* DataGrid.cs: move back to a more lazy scheme for creating the
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #include <unistd.h>
15
16 #include <mono/metadata/appdomain.h>
17 #include <mono/metadata/debug-helpers.h>
18 #include <mono/metadata/threads.h>
19 #include <mono/metadata/profiler-private.h>
20 #include <mono/utils/mono-math.h>
21
22 #include "trace.h"
23 #include "mini-x86.h"
24 #include "inssel.h"
25 #include "cpu-x86.h"
26
27 /* On windows, these hold the key returned by TlsAlloc () */
28 static gint lmf_tls_offset = -1;
29 static gint appdomain_tls_offset = -1;
30 static gint thread_tls_offset = -1;
31
32 #ifdef MONO_XEN_OPT
33 /* TRUE by default until we add runtime detection of Xen */
34 static gboolean optimize_for_xen = TRUE;
35 #else
36 #define optimize_for_xen 0
37 #endif
38
39 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
40
41 #define ARGS_OFFSET 8
42
43 #ifdef PLATFORM_WIN32
44 /* Under windows, the default pinvoke calling convention is stdcall */
45 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
46 #else
47 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
48 #endif
49
50 #define NOT_IMPLEMENTED g_assert_not_reached ()
51
52 const char*
53 mono_arch_regname (int reg) {
54         switch (reg) {
55         case X86_EAX: return "%eax";
56         case X86_EBX: return "%ebx";
57         case X86_ECX: return "%ecx";
58         case X86_EDX: return "%edx";
59         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
60         case X86_EDI: return "%edi";
61         case X86_ESI: return "%esi";
62         }
63         return "unknown";
64 }
65
66 const char*
67 mono_arch_fregname (int reg) {
68         return "unknown";
69 }
70
71 typedef enum {
72         ArgInIReg,
73         ArgInFloatSSEReg,
74         ArgInDoubleSSEReg,
75         ArgOnStack,
76         ArgValuetypeInReg,
77         ArgOnFloatFpStack,
78         ArgOnDoubleFpStack,
79         ArgNone
80 } ArgStorage;
81
82 typedef struct {
83         gint16 offset;
84         gint8  reg;
85         ArgStorage storage;
86
87         /* Only if storage == ArgValuetypeInReg */
88         ArgStorage pair_storage [2];
89         gint8 pair_regs [2];
90 } ArgInfo;
91
92 typedef struct {
93         int nargs;
94         guint32 stack_usage;
95         guint32 reg_usage;
96         guint32 freg_usage;
97         gboolean need_stack_align;
98         guint32 stack_align_amount;
99         ArgInfo ret;
100         ArgInfo sig_cookie;
101         ArgInfo args [1];
102 } CallInfo;
103
104 #define PARAM_REGS 0
105
106 #define FLOAT_PARAM_REGS 0
107
108 static X86_Reg_No param_regs [] = { 0 };
109
110 #ifdef PLATFORM_WIN32
111 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
112 #endif
113
114 static void inline
115 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
116 {
117     ainfo->offset = *stack_size;
118
119     if (*gr >= PARAM_REGS) {
120                 ainfo->storage = ArgOnStack;
121                 (*stack_size) += sizeof (gpointer);
122     }
123     else {
124                 ainfo->storage = ArgInIReg;
125                 ainfo->reg = param_regs [*gr];
126                 (*gr) ++;
127     }
128 }
129
130 static void inline
131 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
132 {
133         ainfo->offset = *stack_size;
134
135         g_assert (PARAM_REGS == 0);
136         
137         ainfo->storage = ArgOnStack;
138         (*stack_size) += sizeof (gpointer) * 2;
139 }
140
141 static void inline
142 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
143 {
144     ainfo->offset = *stack_size;
145
146     if (*gr >= FLOAT_PARAM_REGS) {
147                 ainfo->storage = ArgOnStack;
148                 (*stack_size) += is_double ? 8 : 4;
149     }
150     else {
151                 /* A double register */
152                 if (is_double)
153                         ainfo->storage = ArgInDoubleSSEReg;
154                 else
155                         ainfo->storage = ArgInFloatSSEReg;
156                 ainfo->reg = *gr;
157                 (*gr) += 1;
158     }
159 }
160
161
162 static void
163 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
164                gboolean is_return,
165                guint32 *gr, guint32 *fr, guint32 *stack_size)
166 {
167         guint32 size;
168         MonoClass *klass;
169
170         klass = mono_class_from_mono_type (type);
171         if (sig->pinvoke) 
172                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
173         else 
174                 size = mono_type_stack_size (&klass->byval_arg, NULL);
175
176 #ifdef PLATFORM_WIN32
177         if (sig->pinvoke && is_return) {
178                 MonoMarshalType *info;
179
180                 /*
181                  * the exact rules are not very well documented, the code below seems to work with the 
182                  * code generated by gcc 3.3.3 -mno-cygwin.
183                  */
184                 info = mono_marshal_load_type_info (klass);
185                 g_assert (info);
186
187                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
188
189                 /* Special case structs with only a float member */
190                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
191                         ainfo->storage = ArgValuetypeInReg;
192                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
193                         return;
194                 }
195                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
196                         ainfo->storage = ArgValuetypeInReg;
197                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
198                         return;
199                 }               
200                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
201                         ainfo->storage = ArgValuetypeInReg;
202                         ainfo->pair_storage [0] = ArgInIReg;
203                         ainfo->pair_regs [0] = return_regs [0];
204                         if (info->native_size > 4) {
205                                 ainfo->pair_storage [1] = ArgInIReg;
206                                 ainfo->pair_regs [1] = return_regs [1];
207                         }
208                         return;
209                 }
210         }
211 #endif
212
213         ainfo->offset = *stack_size;
214         ainfo->storage = ArgOnStack;
215         *stack_size += ALIGN_TO (size, sizeof (gpointer));
216 }
217
218 /*
219  * get_call_info:
220  *
221  *  Obtain information about a call according to the calling convention.
222  * For x86 ELF, see the "System V Application Binary Interface Intel386 
223  * Architecture Processor Supplment, Fourth Edition" document for more
224  * information.
225  * For x86 win32, see ???.
226  */
227 static CallInfo*
228 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
229 {
230         guint32 i, gr, fr;
231         MonoType *ret_type;
232         int n = sig->hasthis + sig->param_count;
233         guint32 stack_size = 0;
234         CallInfo *cinfo;
235
236         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
237
238         gr = 0;
239         fr = 0;
240
241         /* return value */
242         {
243                 ret_type = mono_type_get_underlying_type (sig->ret);
244                 switch (ret_type->type) {
245                 case MONO_TYPE_BOOLEAN:
246                 case MONO_TYPE_I1:
247                 case MONO_TYPE_U1:
248                 case MONO_TYPE_I2:
249                 case MONO_TYPE_U2:
250                 case MONO_TYPE_CHAR:
251                 case MONO_TYPE_I4:
252                 case MONO_TYPE_U4:
253                 case MONO_TYPE_I:
254                 case MONO_TYPE_U:
255                 case MONO_TYPE_PTR:
256                 case MONO_TYPE_FNPTR:
257                 case MONO_TYPE_CLASS:
258                 case MONO_TYPE_OBJECT:
259                 case MONO_TYPE_SZARRAY:
260                 case MONO_TYPE_ARRAY:
261                 case MONO_TYPE_STRING:
262                         cinfo->ret.storage = ArgInIReg;
263                         cinfo->ret.reg = X86_EAX;
264                         break;
265                 case MONO_TYPE_U8:
266                 case MONO_TYPE_I8:
267                         cinfo->ret.storage = ArgInIReg;
268                         cinfo->ret.reg = X86_EAX;
269                         break;
270                 case MONO_TYPE_R4:
271                         cinfo->ret.storage = ArgOnFloatFpStack;
272                         break;
273                 case MONO_TYPE_R8:
274                         cinfo->ret.storage = ArgOnDoubleFpStack;
275                         break;
276                 case MONO_TYPE_GENERICINST:
277                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
278                                 cinfo->ret.storage = ArgInIReg;
279                                 cinfo->ret.reg = X86_EAX;
280                                 break;
281                         }
282                         /* Fall through */
283                 case MONO_TYPE_VALUETYPE: {
284                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
285
286                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
287                         if (cinfo->ret.storage == ArgOnStack)
288                                 /* The caller passes the address where the value is stored */
289                                 add_general (&gr, &stack_size, &cinfo->ret);
290                         break;
291                 }
292                 case MONO_TYPE_TYPEDBYREF:
293                         /* Same as a valuetype with size 24 */
294                         add_general (&gr, &stack_size, &cinfo->ret);
295                         ;
296                         break;
297                 case MONO_TYPE_VOID:
298                         cinfo->ret.storage = ArgNone;
299                         break;
300                 default:
301                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
302                 }
303         }
304
305         /* this */
306         if (sig->hasthis)
307                 add_general (&gr, &stack_size, cinfo->args + 0);
308
309         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
310                 gr = PARAM_REGS;
311                 fr = FLOAT_PARAM_REGS;
312                 
313                 /* Emit the signature cookie just before the implicit arguments */
314                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
315         }
316
317         for (i = 0; i < sig->param_count; ++i) {
318                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
319                 MonoType *ptype;
320
321                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
322                         /* We allways pass the sig cookie on the stack for simplicity */
323                         /* 
324                          * Prevent implicit arguments + the sig cookie from being passed 
325                          * in registers.
326                          */
327                         gr = PARAM_REGS;
328                         fr = FLOAT_PARAM_REGS;
329
330                         /* Emit the signature cookie just before the implicit arguments */
331                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
332                 }
333
334                 if (sig->params [i]->byref) {
335                         add_general (&gr, &stack_size, ainfo);
336                         continue;
337                 }
338                 ptype = mono_type_get_underlying_type (sig->params [i]);
339                 switch (ptype->type) {
340                 case MONO_TYPE_BOOLEAN:
341                 case MONO_TYPE_I1:
342                 case MONO_TYPE_U1:
343                         add_general (&gr, &stack_size, ainfo);
344                         break;
345                 case MONO_TYPE_I2:
346                 case MONO_TYPE_U2:
347                 case MONO_TYPE_CHAR:
348                         add_general (&gr, &stack_size, ainfo);
349                         break;
350                 case MONO_TYPE_I4:
351                 case MONO_TYPE_U4:
352                         add_general (&gr, &stack_size, ainfo);
353                         break;
354                 case MONO_TYPE_I:
355                 case MONO_TYPE_U:
356                 case MONO_TYPE_PTR:
357                 case MONO_TYPE_FNPTR:
358                 case MONO_TYPE_CLASS:
359                 case MONO_TYPE_OBJECT:
360                 case MONO_TYPE_STRING:
361                 case MONO_TYPE_SZARRAY:
362                 case MONO_TYPE_ARRAY:
363                         add_general (&gr, &stack_size, ainfo);
364                         break;
365                 case MONO_TYPE_GENERICINST:
366                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
367                                 add_general (&gr, &stack_size, ainfo);
368                                 break;
369                         }
370                         /* Fall through */
371                 case MONO_TYPE_VALUETYPE:
372                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
373                         break;
374                 case MONO_TYPE_TYPEDBYREF:
375                         stack_size += sizeof (MonoTypedRef);
376                         ainfo->storage = ArgOnStack;
377                         break;
378                 case MONO_TYPE_U8:
379                 case MONO_TYPE_I8:
380                         add_general_pair (&gr, &stack_size, ainfo);
381                         break;
382                 case MONO_TYPE_R4:
383                         add_float (&fr, &stack_size, ainfo, FALSE);
384                         break;
385                 case MONO_TYPE_R8:
386                         add_float (&fr, &stack_size, ainfo, TRUE);
387                         break;
388                 default:
389                         g_error ("unexpected type 0x%x", ptype->type);
390                         g_assert_not_reached ();
391                 }
392         }
393
394         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
395                 gr = PARAM_REGS;
396                 fr = FLOAT_PARAM_REGS;
397                 
398                 /* Emit the signature cookie just before the implicit arguments */
399                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
400         }
401
402 #if defined(__APPLE__)
403         if ((stack_size % 16) != 0) { 
404                 cinfo->need_stack_align = TRUE;
405                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
406         }
407 #endif
408
409         cinfo->stack_usage = stack_size;
410         cinfo->reg_usage = gr;
411         cinfo->freg_usage = fr;
412         return cinfo;
413 }
414
415 /*
416  * mono_arch_get_argument_info:
417  * @csig:  a method signature
418  * @param_count: the number of parameters to consider
419  * @arg_info: an array to store the result infos
420  *
421  * Gathers information on parameters such as size, alignment and
422  * padding. arg_info should be large enought to hold param_count + 1 entries. 
423  *
424  * Returns the size of the activation frame.
425  */
426 int
427 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
428 {
429         int k, frame_size = 0;
430         int size, pad;
431         guint32 align;
432         int offset = 8;
433         CallInfo *cinfo;
434
435         cinfo = get_call_info (csig, FALSE);
436
437         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
438                 frame_size += sizeof (gpointer);
439                 offset += 4;
440         }
441
442         arg_info [0].offset = offset;
443
444         if (csig->hasthis) {
445                 frame_size += sizeof (gpointer);
446                 offset += 4;
447         }
448
449         arg_info [0].size = frame_size;
450
451         for (k = 0; k < param_count; k++) {
452                 
453                 if (csig->pinvoke)
454                         size = mono_type_native_stack_size (csig->params [k], &align);
455                 else {
456                         int ialign;
457                         size = mono_type_stack_size (csig->params [k], &ialign);
458                         align = ialign;
459                 }
460
461                 /* ignore alignment for now */
462                 align = 1;
463
464                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
465                 arg_info [k].pad = pad;
466                 frame_size += size;
467                 arg_info [k + 1].pad = 0;
468                 arg_info [k + 1].size = size;
469                 offset += pad;
470                 arg_info [k + 1].offset = offset;
471                 offset += size;
472         }
473
474         align = MONO_ARCH_FRAME_ALIGNMENT;
475         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
476         arg_info [k].pad = pad;
477
478         g_free (cinfo);
479
480         return frame_size;
481 }
482
483 static const guchar cpuid_impl [] = {
484         0x55,                           /* push   %ebp */
485         0x89, 0xe5,                     /* mov    %esp,%ebp */
486         0x53,                           /* push   %ebx */
487         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
488         0x0f, 0xa2,                     /* cpuid   */
489         0x50,                           /* push   %eax */
490         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
491         0x89, 0x18,                     /* mov    %ebx,(%eax) */
492         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
493         0x89, 0x08,                     /* mov    %ecx,(%eax) */
494         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
495         0x89, 0x10,                     /* mov    %edx,(%eax) */
496         0x58,                           /* pop    %eax */
497         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
498         0x89, 0x02,                     /* mov    %eax,(%edx) */
499         0x5b,                           /* pop    %ebx */
500         0xc9,                           /* leave   */
501         0xc3,                           /* ret     */
502 };
503
504 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
505
506 static int 
507 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
508 {
509         int have_cpuid = 0;
510 #ifndef _MSC_VER
511         __asm__  __volatile__ (
512                 "pushfl\n"
513                 "popl %%eax\n"
514                 "movl %%eax, %%edx\n"
515                 "xorl $0x200000, %%eax\n"
516                 "pushl %%eax\n"
517                 "popfl\n"
518                 "pushfl\n"
519                 "popl %%eax\n"
520                 "xorl %%edx, %%eax\n"
521                 "andl $0x200000, %%eax\n"
522                 "movl %%eax, %0"
523                 : "=r" (have_cpuid)
524                 :
525                 : "%eax", "%edx"
526         );
527 #else
528         __asm {
529                 pushfd
530                 pop eax
531                 mov edx, eax
532                 xor eax, 0x200000
533                 push eax
534                 popfd
535                 pushfd
536                 pop eax
537                 xor eax, edx
538                 and eax, 0x200000
539                 mov have_cpuid, eax
540         }
541 #endif
542         if (have_cpuid) {
543                 /* Have to use the code manager to get around WinXP DEP */
544                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
545                 CpuidFunc func;
546                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
547                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
548
549                 func = (CpuidFunc)ptr;
550                 func (id, p_eax, p_ebx, p_ecx, p_edx);
551
552                 mono_code_manager_destroy (codeman);
553
554                 /*
555                  * We use this approach because of issues with gcc and pic code, see:
556                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
557                 __asm__ __volatile__ ("cpuid"
558                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
559                         : "a" (id));
560                 */
561                 return 1;
562         }
563         return 0;
564 }
565
566 /*
567  * Initialize the cpu to execute managed code.
568  */
569 void
570 mono_arch_cpu_init (void)
571 {
572         /* spec compliance requires running with double precision */
573 #ifndef _MSC_VER
574         guint16 fpcw;
575
576         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
577         fpcw &= ~X86_FPCW_PRECC_MASK;
578         fpcw |= X86_FPCW_PREC_DOUBLE;
579         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
580         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
581 #else
582         _control87 (_PC_53, MCW_PC);
583 #endif
584 }
585
586 /*
587  * This function returns the optimizations supported on this cpu.
588  */
589 guint32
590 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
591 {
592         int eax, ebx, ecx, edx;
593         guint32 opts = 0;
594         
595         *exclude_mask = 0;
596         /* Feature Flags function, flags returned in EDX. */
597         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
598                 if (edx & (1 << 15)) {
599                         opts |= MONO_OPT_CMOV;
600                         if (edx & 1)
601                                 opts |= MONO_OPT_FCMOV;
602                         else
603                                 *exclude_mask |= MONO_OPT_FCMOV;
604                 } else
605                         *exclude_mask |= MONO_OPT_CMOV;
606         }
607         return opts;
608 }
609
610 /*
611  * Determine whenever the trap whose info is in SIGINFO is caused by
612  * integer overflow.
613  */
614 gboolean
615 mono_arch_is_int_overflow (void *sigctx, void *info)
616 {
617         MonoContext ctx;
618         guint8* ip;
619
620         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
621
622         ip = (guint8*)ctx.eip;
623
624         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
625                 gint32 reg;
626
627                 /* idiv REG */
628                 switch (x86_modrm_rm (ip [1])) {
629                 case X86_EAX:
630                         reg = ctx.eax;
631                         break;
632                 case X86_ECX:
633                         reg = ctx.ecx;
634                         break;
635                 case X86_EDX:
636                         reg = ctx.edx;
637                         break;
638                 case X86_EBX:
639                         reg = ctx.ebx;
640                         break;
641                 case X86_ESI:
642                         reg = ctx.esi;
643                         break;
644                 case X86_EDI:
645                         reg = ctx.edi;
646                         break;
647                 default:
648                         g_assert_not_reached ();
649                         reg = -1;
650                 }
651
652                 if (reg == -1)
653                         return TRUE;
654         }
655                         
656         return FALSE;
657 }
658
659 static gboolean
660 is_regsize_var (MonoType *t) {
661         if (t->byref)
662                 return TRUE;
663         switch (mono_type_get_underlying_type (t)->type) {
664         case MONO_TYPE_I4:
665         case MONO_TYPE_U4:
666         case MONO_TYPE_I:
667         case MONO_TYPE_U:
668         case MONO_TYPE_PTR:
669         case MONO_TYPE_FNPTR:
670                 return TRUE;
671         case MONO_TYPE_OBJECT:
672         case MONO_TYPE_STRING:
673         case MONO_TYPE_CLASS:
674         case MONO_TYPE_SZARRAY:
675         case MONO_TYPE_ARRAY:
676                 return TRUE;
677         case MONO_TYPE_GENERICINST:
678                 if (!mono_type_generic_inst_is_valuetype (t))
679                         return TRUE;
680                 return FALSE;
681         case MONO_TYPE_VALUETYPE:
682                 return FALSE;
683         }
684         return FALSE;
685 }
686
687 GList *
688 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
689 {
690         GList *vars = NULL;
691         int i;
692
693         for (i = 0; i < cfg->num_varinfo; i++) {
694                 MonoInst *ins = cfg->varinfo [i];
695                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
696
697                 /* unused vars */
698                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
699                         continue;
700
701                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
702                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
703                         continue;
704
705                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
706                  * 8bit quantities in caller saved registers on x86 */
707                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
708                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
709                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
710                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
711                         g_assert (i == vmv->idx);
712                         vars = g_list_prepend (vars, vmv);
713                 }
714         }
715
716         vars = mono_varlist_sort (cfg, vars, 0);
717
718         return vars;
719 }
720
721 GList *
722 mono_arch_get_global_int_regs (MonoCompile *cfg)
723 {
724         GList *regs = NULL;
725
726         /* we can use 3 registers for global allocation */
727         regs = g_list_prepend (regs, (gpointer)X86_EBX);
728         regs = g_list_prepend (regs, (gpointer)X86_ESI);
729         regs = g_list_prepend (regs, (gpointer)X86_EDI);
730
731         return regs;
732 }
733
734 /*
735  * mono_arch_regalloc_cost:
736  *
737  *  Return the cost, in number of memory references, of the action of 
738  * allocating the variable VMV into a register during global register
739  * allocation.
740  */
741 guint32
742 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
743 {
744         MonoInst *ins = cfg->varinfo [vmv->idx];
745
746         if (cfg->method->save_lmf)
747                 /* The register is already saved */
748                 return (ins->opcode == OP_ARG) ? 1 : 0;
749         else
750                 /* push+pop+possible load if it is an argument */
751                 return (ins->opcode == OP_ARG) ? 3 : 2;
752 }
753  
754 /*
755  * Set var information according to the calling convention. X86 version.
756  * The locals var stuff should most likely be split in another method.
757  */
758 void
759 mono_arch_allocate_vars (MonoCompile *cfg)
760 {
761         MonoMethodSignature *sig;
762         MonoMethodHeader *header;
763         MonoInst *inst;
764         guint32 locals_stack_size, locals_stack_align;
765         int i, offset;
766         gint32 *offsets;
767         CallInfo *cinfo;
768
769         header = mono_method_get_header (cfg->method);
770         sig = mono_method_signature (cfg->method);
771
772         cinfo = get_call_info (sig, FALSE);
773
774         cfg->frame_reg = MONO_ARCH_BASEREG;
775         offset = 0;
776
777         /* Reserve space to save LMF and caller saved registers */
778
779         if (cfg->method->save_lmf) {
780                 offset += sizeof (MonoLMF);
781         } else {
782                 if (cfg->used_int_regs & (1 << X86_EBX)) {
783                         offset += 4;
784                 }
785
786                 if (cfg->used_int_regs & (1 << X86_EDI)) {
787                         offset += 4;
788                 }
789
790                 if (cfg->used_int_regs & (1 << X86_ESI)) {
791                         offset += 4;
792                 }
793         }
794
795         switch (cinfo->ret.storage) {
796         case ArgValuetypeInReg:
797                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
798                 offset += 8;
799                 cfg->ret->opcode = OP_REGOFFSET;
800                 cfg->ret->inst_basereg = X86_EBP;
801                 cfg->ret->inst_offset = - offset;
802                 break;
803         default:
804                 break;
805         }
806
807         /* Allocate locals */
808         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
809         if (locals_stack_align) {
810                 offset += (locals_stack_align - 1);
811                 offset &= ~(locals_stack_align - 1);
812         }
813         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
814                 if (offsets [i] != -1) {
815                         MonoInst *inst = cfg->varinfo [i];
816                         inst->opcode = OP_REGOFFSET;
817                         inst->inst_basereg = X86_EBP;
818                         inst->inst_offset = - (offset + offsets [i]);
819                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
820                 }
821         }
822         g_free (offsets);
823         offset += locals_stack_size;
824
825
826         /*
827          * Allocate arguments+return value
828          */
829
830         switch (cinfo->ret.storage) {
831         case ArgOnStack:
832                 cfg->ret->opcode = OP_REGOFFSET;
833                 cfg->ret->inst_basereg = X86_EBP;
834                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
835                 break;
836         case ArgValuetypeInReg:
837                 break;
838         case ArgInIReg:
839                 cfg->ret->opcode = OP_REGVAR;
840                 cfg->ret->inst_c0 = cinfo->ret.reg;
841                 break;
842         case ArgNone:
843         case ArgOnFloatFpStack:
844         case ArgOnDoubleFpStack:
845                 break;
846         default:
847                 g_assert_not_reached ();
848         }
849
850         if (sig->call_convention == MONO_CALL_VARARG) {
851                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
852                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
853         }
854
855         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
856                 ArgInfo *ainfo = &cinfo->args [i];
857                 inst = cfg->varinfo [i];
858                 if (inst->opcode != OP_REGVAR) {
859                         inst->opcode = OP_REGOFFSET;
860                         inst->inst_basereg = X86_EBP;
861                 }
862                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
863         }
864
865         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
866         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
867
868         cfg->stack_offset = offset;
869
870         g_free (cinfo);
871 }
872
873 void
874 mono_arch_create_vars (MonoCompile *cfg)
875 {
876         MonoMethodSignature *sig;
877         CallInfo *cinfo;
878
879         sig = mono_method_signature (cfg->method);
880
881         cinfo = get_call_info (sig, FALSE);
882
883         if (cinfo->ret.storage == ArgValuetypeInReg)
884                 cfg->ret_var_is_local = TRUE;
885
886         g_free (cinfo);
887 }
888
889 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
890  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
891  */
892
893 static void
894 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
895 {
896         MonoInst *arg;
897         MonoMethodSignature *tmp_sig;
898         MonoInst *sig_arg;
899
900         /* FIXME: Add support for signature tokens to AOT */
901         cfg->disable_aot = TRUE;
902         MONO_INST_NEW (cfg, arg, OP_OUTARG);
903
904         /*
905          * mono_ArgIterator_Setup assumes the signature cookie is 
906          * passed first and all the arguments which were before it are
907          * passed on the stack after the signature. So compensate by 
908          * passing a different signature.
909          */
910         tmp_sig = mono_metadata_signature_dup (call->signature);
911         tmp_sig->param_count -= call->signature->sentinelpos;
912         tmp_sig->sentinelpos = 0;
913         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
914
915         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
916         sig_arg->inst_p0 = tmp_sig;
917
918         arg->inst_left = sig_arg;
919         arg->type = STACK_PTR;
920         /* prepend, so they get reversed */
921         arg->next = call->out_args;
922         call->out_args = arg;
923 }
924
925 /* 
926  * take the arguments and generate the arch-specific
927  * instructions to properly call the function in call.
928  * This includes pushing, moving arguments to the right register
929  * etc.
930  */
931 MonoCallInst*
932 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
933         MonoInst *arg, *in;
934         MonoMethodSignature *sig;
935         int i, n;
936         CallInfo *cinfo;
937         int sentinelpos = 0;
938
939         sig = call->signature;
940         n = sig->param_count + sig->hasthis;
941
942         cinfo = get_call_info (sig, FALSE);
943
944         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
945                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
946
947         for (i = 0; i < n; ++i) {
948                 ArgInfo *ainfo = cinfo->args + i;
949
950                 /* Emit the signature cookie just before the implicit arguments */
951                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
952                         emit_sig_cookie (cfg, call);
953                 }
954
955                 if (is_virtual && i == 0) {
956                         /* the argument will be attached to the call instrucion */
957                         in = call->args [i];
958                 } else {
959                         MonoType *t;
960
961                         if (i >= sig->hasthis)
962                                 t = sig->params [i - sig->hasthis];
963                         else
964                                 t = &mono_defaults.int_class->byval_arg;
965                         t = mono_type_get_underlying_type (t);
966
967                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
968                         in = call->args [i];
969                         arg->cil_code = in->cil_code;
970                         arg->inst_left = in;
971                         arg->type = in->type;
972                         /* prepend, so they get reversed */
973                         arg->next = call->out_args;
974                         call->out_args = arg;
975
976                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
977                                 guint32 size, align;
978
979                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
980                                         size = sizeof (MonoTypedRef);
981                                         align = sizeof (gpointer);
982                                 }
983                                 else
984                                         if (sig->pinvoke)
985                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
986                                         else {
987                                                 int ialign;
988                                                 size = mono_type_stack_size (&in->klass->byval_arg, &ialign);
989                                                 align = ialign;
990                                         }
991                                 arg->opcode = OP_OUTARG_VT;
992                                 arg->klass = in->klass;
993                                 arg->backend.is_pinvoke = sig->pinvoke;
994                                 arg->inst_imm = size; 
995                         }
996                         else {
997                                 switch (ainfo->storage) {
998                                 case ArgOnStack:
999                                         arg->opcode = OP_OUTARG;
1000                                         if (!t->byref) {
1001                                                 if (t->type == MONO_TYPE_R4)
1002                                                         arg->opcode = OP_OUTARG_R4;
1003                                                 else
1004                                                         if (t->type == MONO_TYPE_R8)
1005                                                                 arg->opcode = OP_OUTARG_R8;
1006                                         }
1007                                         break;
1008                                 default:
1009                                         g_assert_not_reached ();
1010                                 }
1011                         }
1012                 }
1013         }
1014
1015         /* Handle the case where there are no implicit arguments */
1016         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1017                 emit_sig_cookie (cfg, call);
1018         }
1019
1020         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1021                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1022                         MonoInst *zero_inst;
1023                         /*
1024                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1025                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1026                          * before calling the function. So we add a dummy instruction to represent pushing the 
1027                          * struct return address to the stack. The return address will be saved to this stack slot 
1028                          * by the code emitted in this_vret_args.
1029                          */
1030                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1031                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1032                         zero_inst->inst_p0 = 0;
1033                         arg->inst_left = zero_inst;
1034                         arg->type = STACK_PTR;
1035                         /* prepend, so they get reversed */
1036                         arg->next = call->out_args;
1037                         call->out_args = arg;
1038                 }
1039                 else
1040                         /* if the function returns a struct, the called method already does a ret $0x4 */
1041                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1042                                 cinfo->stack_usage -= 4;
1043         }
1044         
1045         call->stack_usage = cinfo->stack_usage;
1046
1047 #if defined(__APPLE__)
1048         if (cinfo->need_stack_align) {
1049                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1050                 arg->inst_c0 = cinfo->stack_align_amount;
1051                 arg->next = call->out_args;
1052                 call->out_args = arg;
1053         }
1054 #endif 
1055
1056         g_free (cinfo);
1057
1058         return call;
1059 }
1060
1061 /*
1062  * Allow tracing to work with this interface (with an optional argument)
1063  */
1064 void*
1065 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1066 {
1067         guchar *code = p;
1068
1069         /* if some args are passed in registers, we need to save them here */
1070         x86_push_reg (code, X86_EBP);
1071
1072         if (cfg->compile_aot) {
1073                 x86_push_imm (code, cfg->method);
1074                 x86_mov_reg_imm (code, X86_EAX, func);
1075                 x86_call_reg (code, X86_EAX);
1076         } else {
1077                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1078                 x86_push_imm (code, cfg->method);
1079                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1080                 x86_call_code (code, 0);
1081         }
1082         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1083
1084         return code;
1085 }
1086
1087 enum {
1088         SAVE_NONE,
1089         SAVE_STRUCT,
1090         SAVE_EAX,
1091         SAVE_EAX_EDX,
1092         SAVE_FP
1093 };
1094
1095 void*
1096 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1097 {
1098         guchar *code = p;
1099         int arg_size = 0, save_mode = SAVE_NONE;
1100         MonoMethod *method = cfg->method;
1101         
1102         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1103         case MONO_TYPE_VOID:
1104                 /* special case string .ctor icall */
1105                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1106                         save_mode = SAVE_EAX;
1107                 else
1108                         save_mode = SAVE_NONE;
1109                 break;
1110         case MONO_TYPE_I8:
1111         case MONO_TYPE_U8:
1112                 save_mode = SAVE_EAX_EDX;
1113                 break;
1114         case MONO_TYPE_R4:
1115         case MONO_TYPE_R8:
1116                 save_mode = SAVE_FP;
1117                 break;
1118         case MONO_TYPE_GENERICINST:
1119                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1120                         save_mode = SAVE_EAX;
1121                         break;
1122                 }
1123                 /* Fall through */
1124         case MONO_TYPE_VALUETYPE:
1125                 save_mode = SAVE_STRUCT;
1126                 break;
1127         default:
1128                 save_mode = SAVE_EAX;
1129                 break;
1130         }
1131
1132         switch (save_mode) {
1133         case SAVE_EAX_EDX:
1134                 x86_push_reg (code, X86_EDX);
1135                 x86_push_reg (code, X86_EAX);
1136                 if (enable_arguments) {
1137                         x86_push_reg (code, X86_EDX);
1138                         x86_push_reg (code, X86_EAX);
1139                         arg_size = 8;
1140                 }
1141                 break;
1142         case SAVE_EAX:
1143                 x86_push_reg (code, X86_EAX);
1144                 if (enable_arguments) {
1145                         x86_push_reg (code, X86_EAX);
1146                         arg_size = 4;
1147                 }
1148                 break;
1149         case SAVE_FP:
1150                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1151                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1152                 if (enable_arguments) {
1153                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1154                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1155                         arg_size = 8;
1156                 }
1157                 break;
1158         case SAVE_STRUCT:
1159                 if (enable_arguments) {
1160                         x86_push_membase (code, X86_EBP, 8);
1161                         arg_size = 4;
1162                 }
1163                 break;
1164         case SAVE_NONE:
1165         default:
1166                 break;
1167         }
1168
1169         if (cfg->compile_aot) {
1170                 x86_push_imm (code, method);
1171                 x86_mov_reg_imm (code, X86_EAX, func);
1172                 x86_call_reg (code, X86_EAX);
1173         } else {
1174                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1175                 x86_push_imm (code, method);
1176                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1177                 x86_call_code (code, 0);
1178         }
1179         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1180
1181         switch (save_mode) {
1182         case SAVE_EAX_EDX:
1183                 x86_pop_reg (code, X86_EAX);
1184                 x86_pop_reg (code, X86_EDX);
1185                 break;
1186         case SAVE_EAX:
1187                 x86_pop_reg (code, X86_EAX);
1188                 break;
1189         case SAVE_FP:
1190                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1191                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1192                 break;
1193         case SAVE_NONE:
1194         default:
1195                 break;
1196         }
1197
1198         return code;
1199 }
1200
1201 #define EMIT_COND_BRANCH(ins,cond,sign) \
1202 if (ins->flags & MONO_INST_BRLABEL) { \
1203         if (ins->inst_i0->inst_c0) { \
1204                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1205         } else { \
1206                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1207                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1208                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1209                         x86_branch8 (code, cond, 0, sign); \
1210                 else \
1211                         x86_branch32 (code, cond, 0, sign); \
1212         } \
1213 } else { \
1214         if (ins->inst_true_bb->native_offset) { \
1215                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1216         } else { \
1217                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1218                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1219                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1220                         x86_branch8 (code, cond, 0, sign); \
1221                 else \
1222                         x86_branch32 (code, cond, 0, sign); \
1223         } \
1224 }
1225
1226 /*  
1227  *      Emit an exception if condition is fail and
1228  *  if possible do a directly branch to target 
1229  */
1230 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1231         do {                                                        \
1232                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1233                 if (tins == NULL) {                                                                             \
1234                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1235                                         MONO_PATCH_INFO_EXC, exc_name);  \
1236                         x86_branch32 (code, cond, 0, signed);               \
1237                 } else {        \
1238                         EMIT_COND_BRANCH (tins, cond, signed);  \
1239                 }                       \
1240         } while (0); 
1241
1242 #define EMIT_FPCOMPARE(code) do { \
1243         x86_fcompp (code); \
1244         x86_fnstsw (code); \
1245 } while (0); 
1246
1247
1248 static guint8*
1249 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1250 {
1251         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1252         x86_call_code (code, 0);
1253
1254         return code;
1255 }
1256
1257 /* FIXME: Add more instructions */
1258 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1259
1260 static void
1261 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1262 {
1263         MonoInst *ins, *last_ins = NULL;
1264         ins = bb->code;
1265
1266         while (ins) {
1267
1268                 switch (ins->opcode) {
1269                 case OP_ICONST:
1270                         /* reg = 0 -> XOR (reg, reg) */
1271                         /* XOR sets cflags on x86, so we cant do it always */
1272                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1273                                 ins->opcode = CEE_XOR;
1274                                 ins->sreg1 = ins->dreg;
1275                                 ins->sreg2 = ins->dreg;
1276                         }
1277                         break;
1278                 case OP_MUL_IMM: 
1279                         /* remove unnecessary multiplication with 1 */
1280                         if (ins->inst_imm == 1) {
1281                                 if (ins->dreg != ins->sreg1) {
1282                                         ins->opcode = OP_MOVE;
1283                                 } else {
1284                                         last_ins->next = ins->next;
1285                                         ins = ins->next;
1286                                         continue;
1287                                 }
1288                         }
1289                         break;
1290                 case OP_COMPARE_IMM:
1291                         /* OP_COMPARE_IMM (reg, 0) 
1292                          * --> 
1293                          * OP_X86_TEST_NULL (reg) 
1294                          */
1295                         if (!ins->inst_imm)
1296                                 ins->opcode = OP_X86_TEST_NULL;
1297                         break;
1298                 case OP_X86_COMPARE_MEMBASE_IMM:
1299                         /* 
1300                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1301                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1302                          * -->
1303                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1304                          * OP_COMPARE_IMM reg, imm
1305                          *
1306                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1307                          */
1308                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1309                             ins->inst_basereg == last_ins->inst_destbasereg &&
1310                             ins->inst_offset == last_ins->inst_offset) {
1311                                         ins->opcode = OP_COMPARE_IMM;
1312                                         ins->sreg1 = last_ins->sreg1;
1313
1314                                         /* check if we can remove cmp reg,0 with test null */
1315                                         if (!ins->inst_imm)
1316                                                 ins->opcode = OP_X86_TEST_NULL;
1317                                 }
1318
1319                         break;
1320                 case OP_LOAD_MEMBASE:
1321                 case OP_LOADI4_MEMBASE:
1322                         /* 
1323                          * Note: if reg1 = reg2 the load op is removed
1324                          *
1325                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1326                          * OP_LOAD_MEMBASE offset(basereg), reg2
1327                          * -->
1328                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1329                          * OP_MOVE reg1, reg2
1330                          */
1331                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1332                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1333                             ins->inst_basereg == last_ins->inst_destbasereg &&
1334                             ins->inst_offset == last_ins->inst_offset) {
1335                                 if (ins->dreg == last_ins->sreg1) {
1336                                         last_ins->next = ins->next;                             
1337                                         ins = ins->next;                                
1338                                         continue;
1339                                 } else {
1340                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1341                                         ins->opcode = OP_MOVE;
1342                                         ins->sreg1 = last_ins->sreg1;
1343                                 }
1344
1345                         /* 
1346                          * Note: reg1 must be different from the basereg in the second load
1347                          * Note: if reg1 = reg2 is equal then second load is removed
1348                          *
1349                          * OP_LOAD_MEMBASE offset(basereg), reg1
1350                          * OP_LOAD_MEMBASE offset(basereg), reg2
1351                          * -->
1352                          * OP_LOAD_MEMBASE offset(basereg), reg1
1353                          * OP_MOVE reg1, reg2
1354                          */
1355                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1356                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1357                               ins->inst_basereg != last_ins->dreg &&
1358                               ins->inst_basereg == last_ins->inst_basereg &&
1359                               ins->inst_offset == last_ins->inst_offset) {
1360
1361                                 if (ins->dreg == last_ins->dreg) {
1362                                         last_ins->next = ins->next;                             
1363                                         ins = ins->next;                                
1364                                         continue;
1365                                 } else {
1366                                         ins->opcode = OP_MOVE;
1367                                         ins->sreg1 = last_ins->dreg;
1368                                 }
1369
1370                                 //g_assert_not_reached ();
1371
1372 #if 0
1373                         /* 
1374                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1375                          * OP_LOAD_MEMBASE offset(basereg), reg
1376                          * -->
1377                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1378                          * OP_ICONST reg, imm
1379                          */
1380                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1381                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1382                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1383                                    ins->inst_offset == last_ins->inst_offset) {
1384                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1385                                 ins->opcode = OP_ICONST;
1386                                 ins->inst_c0 = last_ins->inst_imm;
1387                                 g_assert_not_reached (); // check this rule
1388 #endif
1389                         }
1390                         break;
1391                 case OP_LOADU1_MEMBASE:
1392                 case OP_LOADI1_MEMBASE:
1393                         /* 
1394                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1395                          * OP_LOAD_MEMBASE offset(basereg), reg2
1396                          * -->
1397                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1398                          * CONV_I2/U2 reg1, reg2
1399                          */
1400                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1401                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1402                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1403                                         ins->inst_offset == last_ins->inst_offset) {
1404                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1405                                 ins->sreg1 = last_ins->sreg1;
1406                         }
1407                         break;
1408                 case OP_LOADU2_MEMBASE:
1409                 case OP_LOADI2_MEMBASE:
1410                         /* 
1411                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1412                          * OP_LOAD_MEMBASE offset(basereg), reg2
1413                          * -->
1414                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1415                          * CONV_I2/U2 reg1, reg2
1416                          */
1417                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1418                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1419                                         ins->inst_offset == last_ins->inst_offset) {
1420                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1421                                 ins->sreg1 = last_ins->sreg1;
1422                         }
1423                         break;
1424                 case CEE_CONV_I4:
1425                 case CEE_CONV_U4:
1426                 case OP_MOVE:
1427                         /*
1428                          * Removes:
1429                          *
1430                          * OP_MOVE reg, reg 
1431                          */
1432                         if (ins->dreg == ins->sreg1) {
1433                                 if (last_ins)
1434                                         last_ins->next = ins->next;                             
1435                                 ins = ins->next;
1436                                 continue;
1437                         }
1438                         /* 
1439                          * Removes:
1440                          *
1441                          * OP_MOVE sreg, dreg 
1442                          * OP_MOVE dreg, sreg
1443                          */
1444                         if (last_ins && last_ins->opcode == OP_MOVE &&
1445                             ins->sreg1 == last_ins->dreg &&
1446                             ins->dreg == last_ins->sreg1) {
1447                                 last_ins->next = ins->next;                             
1448                                 ins = ins->next;                                
1449                                 continue;
1450                         }
1451                         break;
1452                         
1453                 case OP_X86_PUSH_MEMBASE:
1454                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1455                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1456                             ins->inst_basereg == last_ins->inst_destbasereg &&
1457                             ins->inst_offset == last_ins->inst_offset) {
1458                                     ins->opcode = OP_X86_PUSH;
1459                                     ins->sreg1 = last_ins->sreg1;
1460                         }
1461                         break;
1462                 }
1463                 last_ins = ins;
1464                 ins = ins->next;
1465         }
1466         bb->last_ins = last_ins;
1467 }
1468
1469 static const int 
1470 branch_cc_table [] = {
1471         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1472         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1473         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1474 };
1475
1476 static const char*const * ins_spec = x86_desc;
1477
1478 /*#include "cprop.c"*/
1479 void
1480 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1481 {
1482         mono_local_regalloc (cfg, bb);
1483 }
1484
1485 static unsigned char*
1486 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1487 {
1488         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1489         x86_fnstcw_membase(code, X86_ESP, 0);
1490         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1491         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1492         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1493         x86_fldcw_membase (code, X86_ESP, 2);
1494         if (size == 8) {
1495                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1496                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1497                 x86_pop_reg (code, dreg);
1498                 /* FIXME: need the high register 
1499                  * x86_pop_reg (code, dreg_high);
1500                  */
1501         } else {
1502                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1503                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1504                 x86_pop_reg (code, dreg);
1505         }
1506         x86_fldcw_membase (code, X86_ESP, 0);
1507         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1508
1509         if (size == 1)
1510                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1511         else if (size == 2)
1512                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1513         return code;
1514 }
1515
1516 static unsigned char*
1517 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1518 {
1519         int sreg = tree->sreg1;
1520         int need_touch = FALSE;
1521
1522 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1523         need_touch = TRUE;
1524 #endif
1525
1526         if (need_touch) {
1527                 guint8* br[5];
1528
1529                 /*
1530                  * Under Windows:
1531                  * If requested stack size is larger than one page,
1532                  * perform stack-touch operation
1533                  */
1534                 /*
1535                  * Generate stack probe code.
1536                  * Under Windows, it is necessary to allocate one page at a time,
1537                  * "touching" stack after each successful sub-allocation. This is
1538                  * because of the way stack growth is implemented - there is a
1539                  * guard page before the lowest stack page that is currently commited.
1540                  * Stack normally grows sequentially so OS traps access to the
1541                  * guard page and commits more pages when needed.
1542                  */
1543                 x86_test_reg_imm (code, sreg, ~0xFFF);
1544                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1545
1546                 br[2] = code; /* loop */
1547                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1548                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1549
1550                 /* 
1551                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1552                  * that follows only initializes the last part of the area.
1553                  */
1554                 /* Same as the init code below with size==0x1000 */
1555                 if (tree->flags & MONO_INST_INIT) {
1556                         x86_push_reg (code, X86_EAX);
1557                         x86_push_reg (code, X86_ECX);
1558                         x86_push_reg (code, X86_EDI);
1559                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1560                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1561                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1562                         x86_cld (code);
1563                         x86_prefix (code, X86_REP_PREFIX);
1564                         x86_stosl (code);
1565                         x86_pop_reg (code, X86_EDI);
1566                         x86_pop_reg (code, X86_ECX);
1567                         x86_pop_reg (code, X86_EAX);
1568                 }
1569
1570                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1571                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1572                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1573                 x86_patch (br[3], br[2]);
1574                 x86_test_reg_reg (code, sreg, sreg);
1575                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1576                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1577
1578                 br[1] = code; x86_jump8 (code, 0);
1579
1580                 x86_patch (br[0], code);
1581                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1582                 x86_patch (br[1], code);
1583                 x86_patch (br[4], code);
1584         }
1585         else
1586                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1587
1588         if (tree->flags & MONO_INST_INIT) {
1589                 int offset = 0;
1590                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1591                         x86_push_reg (code, X86_EAX);
1592                         offset += 4;
1593                 }
1594                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1595                         x86_push_reg (code, X86_ECX);
1596                         offset += 4;
1597                 }
1598                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1599                         x86_push_reg (code, X86_EDI);
1600                         offset += 4;
1601                 }
1602                 
1603                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1604                 if (sreg != X86_ECX)
1605                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1606                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1607                                 
1608                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1609                 x86_cld (code);
1610                 x86_prefix (code, X86_REP_PREFIX);
1611                 x86_stosl (code);
1612                 
1613                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1614                         x86_pop_reg (code, X86_EDI);
1615                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1616                         x86_pop_reg (code, X86_ECX);
1617                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1618                         x86_pop_reg (code, X86_EAX);
1619         }
1620         return code;
1621 }
1622
1623
1624 static guint8*
1625 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1626 {
1627         CallInfo *cinfo;
1628         int quad;
1629
1630         /* Move return value to the target register */
1631         switch (ins->opcode) {
1632         case CEE_CALL:
1633         case OP_CALL_REG:
1634         case OP_CALL_MEMBASE:
1635                 if (ins->dreg != X86_EAX)
1636                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1637                 break;
1638         case OP_VCALL:
1639         case OP_VCALL_REG:
1640         case OP_VCALL_MEMBASE:
1641                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
1642                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1643                         /* Pop the destination address from the stack */
1644                         x86_pop_reg (code, X86_ECX);
1645                         
1646                         for (quad = 0; quad < 2; quad ++) {
1647                                 switch (cinfo->ret.pair_storage [quad]) {
1648                                 case ArgInIReg:
1649                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1650                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1651                                         break;
1652                                 case ArgNone:
1653                                         break;
1654                                 default:
1655                                         g_assert_not_reached ();
1656                                 }
1657                         }
1658                 }
1659                 g_free (cinfo);
1660         default:
1661                 break;
1662         }
1663
1664         return code;
1665 }
1666
1667 /*
1668  * emit_tls_get:
1669  * @code: buffer to store code to
1670  * @dreg: hard register where to place the result
1671  * @tls_offset: offset info
1672  *
1673  * emit_tls_get emits in @code the native code that puts in the dreg register
1674  * the item in the thread local storage identified by tls_offset.
1675  *
1676  * Returns: a pointer to the end of the stored code
1677  */
1678 static guint8*
1679 emit_tls_get (guint8* code, int dreg, int tls_offset)
1680 {
1681 #ifdef PLATFORM_WIN32
1682         /* 
1683          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1684          * Journal and/or a disassembly of the TlsGet () function.
1685          */
1686         g_assert (tls_offset < 64);
1687         x86_prefix (code, X86_FS_PREFIX);
1688         x86_mov_reg_mem (code, dreg, 0x18, 4);
1689         /* Dunno what this does but TlsGetValue () contains it */
1690         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1691         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1692 #else
1693         if (optimize_for_xen) {
1694                 x86_prefix (code, X86_GS_PREFIX);
1695                 x86_mov_reg_mem (code, dreg, 0, 4);
1696                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1697         } else {
1698                 x86_prefix (code, X86_GS_PREFIX);
1699                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1700         }
1701 #endif
1702         return code;
1703 }
1704
1705 /*
1706  * emit_load_volatile_arguments:
1707  *
1708  *  Load volatile arguments from the stack to the original input registers.
1709  * Required before a tail call.
1710  */
1711 static guint8*
1712 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
1713 {
1714         MonoMethod *method = cfg->method;
1715         MonoMethodSignature *sig;
1716         MonoInst *inst;
1717         CallInfo *cinfo;
1718         guint32 i;
1719
1720         /* FIXME: Generate intermediate code instead */
1721
1722         sig = mono_method_signature (method);
1723
1724         cinfo = get_call_info (sig, FALSE);
1725         
1726         /* This is the opposite of the code in emit_prolog */
1727
1728         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1729                 ArgInfo *ainfo = cinfo->args + i;
1730                 MonoType *arg_type;
1731                 inst = cfg->varinfo [i];
1732
1733                 if (sig->hasthis && (i == 0))
1734                         arg_type = &mono_defaults.object_class->byval_arg;
1735                 else
1736                         arg_type = sig->params [i - sig->hasthis];
1737
1738                 /*
1739                  * On x86, the arguments are either in their original stack locations, or in
1740                  * global regs.
1741                  */
1742                 if (inst->opcode == OP_REGVAR) {
1743                         g_assert (ainfo->storage == ArgOnStack);
1744                         
1745                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
1746                 }
1747         }
1748
1749         g_free (cinfo);
1750
1751         return code;
1752 }
1753
1754 #define REAL_PRINT_REG(text,reg) \
1755 mono_assert (reg >= 0); \
1756 x86_push_reg (code, X86_EAX); \
1757 x86_push_reg (code, X86_EDX); \
1758 x86_push_reg (code, X86_ECX); \
1759 x86_push_reg (code, reg); \
1760 x86_push_imm (code, reg); \
1761 x86_push_imm (code, text " %d %p\n"); \
1762 x86_mov_reg_imm (code, X86_EAX, printf); \
1763 x86_call_reg (code, X86_EAX); \
1764 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1765 x86_pop_reg (code, X86_ECX); \
1766 x86_pop_reg (code, X86_EDX); \
1767 x86_pop_reg (code, X86_EAX);
1768
1769 /* benchmark and set based on cpu */
1770 #define LOOP_ALIGNMENT 8
1771 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
1772
1773 void
1774 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1775 {
1776         MonoInst *ins;
1777         MonoCallInst *call;
1778         guint offset;
1779         guint8 *code = cfg->native_code + cfg->code_len;
1780         MonoInst *last_ins = NULL;
1781         guint last_offset = 0;
1782         int max_len, cpos;
1783
1784         if (cfg->opt & MONO_OPT_PEEPHOLE)
1785                 peephole_pass (cfg, bb);
1786
1787         if (cfg->opt & MONO_OPT_LOOP) {
1788                 int pad, align = LOOP_ALIGNMENT;
1789                 /* set alignment depending on cpu */
1790                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
1791                         pad = align - pad;
1792                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
1793                         x86_padding (code, pad);
1794                         cfg->code_len += pad;
1795                         bb->native_offset = cfg->code_len;
1796                 }
1797         }
1798
1799         if (cfg->verbose_level > 2)
1800                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1801
1802         cpos = bb->max_offset;
1803
1804         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
1805                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
1806                 g_assert (!cfg->compile_aot);
1807                 cpos += 6;
1808
1809                 cov->data [bb->dfn].cil_code = bb->cil_code;
1810                 /* this is not thread save, but good enough */
1811                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
1812         }
1813
1814         offset = code - cfg->native_code;
1815
1816         mono_debug_open_block (cfg, bb, offset);
1817
1818         ins = bb->code;
1819         while (ins) {
1820                 offset = code - cfg->native_code;
1821
1822                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
1823
1824                 if (offset > (cfg->code_size - max_len - 16)) {
1825                         cfg->code_size *= 2;
1826                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
1827                         code = cfg->native_code + offset;
1828                         mono_jit_stats.code_reallocs++;
1829                 }
1830
1831                 mono_debug_record_line_number (cfg, ins, offset);
1832
1833                 switch (ins->opcode) {
1834                 case OP_BIGMUL:
1835                         x86_mul_reg (code, ins->sreg2, TRUE);
1836                         break;
1837                 case OP_BIGMUL_UN:
1838                         x86_mul_reg (code, ins->sreg2, FALSE);
1839                         break;
1840                 case OP_X86_SETEQ_MEMBASE:
1841                 case OP_X86_SETNE_MEMBASE:
1842                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
1843                                          ins->inst_basereg, ins->inst_offset, TRUE);
1844                         break;
1845                 case OP_STOREI1_MEMBASE_IMM:
1846                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
1847                         break;
1848                 case OP_STOREI2_MEMBASE_IMM:
1849                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
1850                         break;
1851                 case OP_STORE_MEMBASE_IMM:
1852                 case OP_STOREI4_MEMBASE_IMM:
1853                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
1854                         break;
1855                 case OP_STOREI1_MEMBASE_REG:
1856                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
1857                         break;
1858                 case OP_STOREI2_MEMBASE_REG:
1859                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
1860                         break;
1861                 case OP_STORE_MEMBASE_REG:
1862                 case OP_STOREI4_MEMBASE_REG:
1863                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
1864                         break;
1865                 case CEE_LDIND_I:
1866                 case CEE_LDIND_I4:
1867                 case CEE_LDIND_U4:
1868                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
1869                         break;
1870                 case OP_LOADU4_MEM:
1871                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
1872                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
1873                         break;
1874                 case OP_LOAD_MEMBASE:
1875                 case OP_LOADI4_MEMBASE:
1876                 case OP_LOADU4_MEMBASE:
1877                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
1878                         break;
1879                 case OP_LOADU1_MEMBASE:
1880                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
1881                         break;
1882                 case OP_LOADI1_MEMBASE:
1883                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
1884                         break;
1885                 case OP_LOADU2_MEMBASE:
1886                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
1887                         break;
1888                 case OP_LOADI2_MEMBASE:
1889                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
1890                         break;
1891                 case CEE_CONV_I1:
1892                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
1893                         break;
1894                 case CEE_CONV_I2:
1895                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
1896                         break;
1897                 case CEE_CONV_U1:
1898                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
1899                         break;
1900                 case CEE_CONV_U2:
1901                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
1902                         break;
1903                 case OP_COMPARE:
1904                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
1905                         break;
1906                 case OP_COMPARE_IMM:
1907                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
1908                         break;
1909                 case OP_X86_COMPARE_MEMBASE_REG:
1910                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
1911                         break;
1912                 case OP_X86_COMPARE_MEMBASE_IMM:
1913                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1914                         break;
1915                 case OP_X86_COMPARE_MEMBASE8_IMM:
1916                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1917                         break;
1918                 case OP_X86_COMPARE_REG_MEMBASE:
1919                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
1920                         break;
1921                 case OP_X86_COMPARE_MEM_IMM:
1922                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
1923                         break;
1924                 case OP_X86_TEST_NULL:
1925                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
1926                         break;
1927                 case OP_X86_ADD_MEMBASE_IMM:
1928                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1929                         break;
1930                 case OP_X86_ADD_MEMBASE:
1931                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
1932                         break;
1933                 case OP_X86_SUB_MEMBASE_IMM:
1934                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1935                         break;
1936                 case OP_X86_SUB_MEMBASE:
1937                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
1938                         break;
1939                 case OP_X86_AND_MEMBASE_IMM:
1940                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1941                         break;
1942                 case OP_X86_OR_MEMBASE_IMM:
1943                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1944                         break;
1945                 case OP_X86_XOR_MEMBASE_IMM:
1946                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1947                         break;
1948                 case OP_X86_INC_MEMBASE:
1949                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
1950                         break;
1951                 case OP_X86_INC_REG:
1952                         x86_inc_reg (code, ins->dreg);
1953                         break;
1954                 case OP_X86_DEC_MEMBASE:
1955                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
1956                         break;
1957                 case OP_X86_DEC_REG:
1958                         x86_dec_reg (code, ins->dreg);
1959                         break;
1960                 case OP_X86_MUL_MEMBASE:
1961                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
1962                         break;
1963                 case CEE_BREAK:
1964                         x86_breakpoint (code);
1965                         break;
1966                 case OP_ADDCC:
1967                 case CEE_ADD:
1968                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
1969                         break;
1970                 case OP_ADC:
1971                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
1972                         break;
1973                 case OP_ADDCC_IMM:
1974                 case OP_ADD_IMM:
1975                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
1976                         break;
1977                 case OP_ADC_IMM:
1978                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
1979                         break;
1980                 case OP_SUBCC:
1981                 case CEE_SUB:
1982                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
1983                         break;
1984                 case OP_SBB:
1985                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
1986                         break;
1987                 case OP_SUBCC_IMM:
1988                 case OP_SUB_IMM:
1989                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
1990                         break;
1991                 case OP_SBB_IMM:
1992                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
1993                         break;
1994                 case CEE_AND:
1995                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
1996                         break;
1997                 case OP_AND_IMM:
1998                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
1999                         break;
2000                 case CEE_DIV:
2001                         x86_cdq (code);
2002                         x86_div_reg (code, ins->sreg2, TRUE);
2003                         break;
2004                 case CEE_DIV_UN:
2005                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2006                         x86_div_reg (code, ins->sreg2, FALSE);
2007                         break;
2008                 case OP_DIV_IMM:
2009                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2010                         x86_cdq (code);
2011                         x86_div_reg (code, ins->sreg2, TRUE);
2012                         break;
2013                 case CEE_REM:
2014                         x86_cdq (code);
2015                         x86_div_reg (code, ins->sreg2, TRUE);
2016                         break;
2017                 case CEE_REM_UN:
2018                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2019                         x86_div_reg (code, ins->sreg2, FALSE);
2020                         break;
2021                 case OP_REM_IMM:
2022                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2023                         x86_cdq (code);
2024                         x86_div_reg (code, ins->sreg2, TRUE);
2025                         break;
2026                 case CEE_OR:
2027                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2028                         break;
2029                 case OP_OR_IMM:
2030                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2031                         break;
2032                 case CEE_XOR:
2033                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2034                         break;
2035                 case OP_XOR_IMM:
2036                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2037                         break;
2038                 case CEE_SHL:
2039                         g_assert (ins->sreg2 == X86_ECX);
2040                         x86_shift_reg (code, X86_SHL, ins->dreg);
2041                         break;
2042                 case CEE_SHR:
2043                         g_assert (ins->sreg2 == X86_ECX);
2044                         x86_shift_reg (code, X86_SAR, ins->dreg);
2045                         break;
2046                 case OP_SHR_IMM:
2047                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2048                         break;
2049                 case OP_SHR_UN_IMM:
2050                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2051                         break;
2052                 case CEE_SHR_UN:
2053                         g_assert (ins->sreg2 == X86_ECX);
2054                         x86_shift_reg (code, X86_SHR, ins->dreg);
2055                         break;
2056                 case OP_SHL_IMM:
2057                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2058                         break;
2059                 case OP_LSHL: {
2060                         guint8 *jump_to_end;
2061
2062                         /* handle shifts below 32 bits */
2063                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2064                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2065
2066                         x86_test_reg_imm (code, X86_ECX, 32);
2067                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2068
2069                         /* handle shift over 32 bit */
2070                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2071                         x86_clear_reg (code, ins->sreg1);
2072                         
2073                         x86_patch (jump_to_end, code);
2074                         }
2075                         break;
2076                 case OP_LSHR: {
2077                         guint8 *jump_to_end;
2078
2079                         /* handle shifts below 32 bits */
2080                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2081                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2082
2083                         x86_test_reg_imm (code, X86_ECX, 32);
2084                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2085
2086                         /* handle shifts over 31 bits */
2087                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2088                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2089                         
2090                         x86_patch (jump_to_end, code);
2091                         }
2092                         break;
2093                 case OP_LSHR_UN: {
2094                         guint8 *jump_to_end;
2095
2096                         /* handle shifts below 32 bits */
2097                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2098                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2099
2100                         x86_test_reg_imm (code, X86_ECX, 32);
2101                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2102
2103                         /* handle shifts over 31 bits */
2104                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2105                         x86_clear_reg (code, ins->backend.reg3);
2106                         
2107                         x86_patch (jump_to_end, code);
2108                         }
2109                         break;
2110                 case OP_LSHL_IMM:
2111                         if (ins->inst_imm >= 32) {
2112                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2113                                 x86_clear_reg (code, ins->sreg1);
2114                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2115                         } else {
2116                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2117                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2118                         }
2119                         break;
2120                 case OP_LSHR_IMM:
2121                         if (ins->inst_imm >= 32) {
2122                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2123                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2124                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2125                         } else {
2126                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2127                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2128                         }
2129                         break;
2130                 case OP_LSHR_UN_IMM:
2131                         if (ins->inst_imm >= 32) {
2132                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2133                                 x86_clear_reg (code, ins->backend.reg3);
2134                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2135                         } else {
2136                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2137                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2138                         }
2139                         break;
2140                 case CEE_NOT:
2141                         x86_not_reg (code, ins->sreg1);
2142                         break;
2143                 case CEE_NEG:
2144                         x86_neg_reg (code, ins->sreg1);
2145                         break;
2146                 case OP_SEXT_I1:
2147                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2148                         break;
2149                 case OP_SEXT_I2:
2150                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2151                         break;
2152                 case CEE_MUL:
2153                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2154                         break;
2155                 case OP_MUL_IMM:
2156                         switch (ins->inst_imm) {
2157                         case 2:
2158                                 /* MOV r1, r2 */
2159                                 /* ADD r1, r1 */
2160                                 if (ins->dreg != ins->sreg1)
2161                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2162                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2163                                 break;
2164                         case 3:
2165                                 /* LEA r1, [r2 + r2*2] */
2166                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2167                                 break;
2168                         case 5:
2169                                 /* LEA r1, [r2 + r2*4] */
2170                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2171                                 break;
2172                         case 6:
2173                                 /* LEA r1, [r2 + r2*2] */
2174                                 /* ADD r1, r1          */
2175                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2176                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2177                                 break;
2178                         case 9:
2179                                 /* LEA r1, [r2 + r2*8] */
2180                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2181                                 break;
2182                         case 10:
2183                                 /* LEA r1, [r2 + r2*4] */
2184                                 /* ADD r1, r1          */
2185                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2186                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2187                                 break;
2188                         case 12:
2189                                 /* LEA r1, [r2 + r2*2] */
2190                                 /* SHL r1, 2           */
2191                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2192                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2193                                 break;
2194                         case 25:
2195                                 /* LEA r1, [r2 + r2*4] */
2196                                 /* LEA r1, [r1 + r1*4] */
2197                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2198                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2199                                 break;
2200                         case 100:
2201                                 /* LEA r1, [r2 + r2*4] */
2202                                 /* SHL r1, 2           */
2203                                 /* LEA r1, [r1 + r1*4] */
2204                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2205                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2206                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2207                                 break;
2208                         default:
2209                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2210                                 break;
2211                         }
2212                         break;
2213                 case CEE_MUL_OVF:
2214                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2215                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2216                         break;
2217                 case CEE_MUL_OVF_UN: {
2218                         /* the mul operation and the exception check should most likely be split */
2219                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2220                         /*g_assert (ins->sreg2 == X86_EAX);
2221                         g_assert (ins->dreg == X86_EAX);*/
2222                         if (ins->sreg2 == X86_EAX) {
2223                                 non_eax_reg = ins->sreg1;
2224                         } else if (ins->sreg1 == X86_EAX) {
2225                                 non_eax_reg = ins->sreg2;
2226                         } else {
2227                                 /* no need to save since we're going to store to it anyway */
2228                                 if (ins->dreg != X86_EAX) {
2229                                         saved_eax = TRUE;
2230                                         x86_push_reg (code, X86_EAX);
2231                                 }
2232                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2233                                 non_eax_reg = ins->sreg2;
2234                         }
2235                         if (ins->dreg == X86_EDX) {
2236                                 if (!saved_eax) {
2237                                         saved_eax = TRUE;
2238                                         x86_push_reg (code, X86_EAX);
2239                                 }
2240                         } else if (ins->dreg != X86_EAX) {
2241                                 saved_edx = TRUE;
2242                                 x86_push_reg (code, X86_EDX);
2243                         }
2244                         x86_mul_reg (code, non_eax_reg, FALSE);
2245                         /* save before the check since pop and mov don't change the flags */
2246                         if (ins->dreg != X86_EAX)
2247                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2248                         if (saved_edx)
2249                                 x86_pop_reg (code, X86_EDX);
2250                         if (saved_eax)
2251                                 x86_pop_reg (code, X86_EAX);
2252                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2253                         break;
2254                 }
2255                 case OP_ICONST:
2256                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2257                         break;
2258                 case OP_AOTCONST:
2259                         g_assert_not_reached ();
2260                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2261                         x86_mov_reg_imm (code, ins->dreg, 0);
2262                         break;
2263                 case OP_LOAD_GOTADDR:
2264                         x86_call_imm (code, 0);
2265                         /* 
2266                          * The patch needs to point to the pop, since the GOT offset needs 
2267                          * to be added to that address.
2268                          */
2269                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2270                         x86_pop_reg (code, ins->dreg);
2271                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2272                         break;
2273                 case OP_GOT_ENTRY:
2274                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2275                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2276                         break;
2277                 case OP_X86_PUSH_GOT_ENTRY:
2278                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2279                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2280                         break;
2281                 case CEE_CONV_I4:
2282                 case OP_MOVE:
2283                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2284                         break;
2285                 case CEE_CONV_U4:
2286                         g_assert_not_reached ();
2287                 case CEE_JMP: {
2288                         /*
2289                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2290                          * Keep in sync with the code in emit_epilog.
2291                          */
2292                         int pos = 0;
2293
2294                         /* FIXME: no tracing support... */
2295                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2296                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2297                         /* reset offset to make max_len work */
2298                         offset = code - cfg->native_code;
2299
2300                         g_assert (!cfg->method->save_lmf);
2301
2302                         code = emit_load_volatile_arguments (cfg, code);
2303
2304                         if (cfg->used_int_regs & (1 << X86_EBX))
2305                                 pos -= 4;
2306                         if (cfg->used_int_regs & (1 << X86_EDI))
2307                                 pos -= 4;
2308                         if (cfg->used_int_regs & (1 << X86_ESI))
2309                                 pos -= 4;
2310                         if (pos)
2311                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2312         
2313                         if (cfg->used_int_regs & (1 << X86_ESI))
2314                                 x86_pop_reg (code, X86_ESI);
2315                         if (cfg->used_int_regs & (1 << X86_EDI))
2316                                 x86_pop_reg (code, X86_EDI);
2317                         if (cfg->used_int_regs & (1 << X86_EBX))
2318                                 x86_pop_reg (code, X86_EBX);
2319         
2320                         /* restore ESP/EBP */
2321                         x86_leave (code);
2322                         offset = code - cfg->native_code;
2323                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2324                         x86_jump32 (code, 0);
2325                         break;
2326                 }
2327                 case OP_CHECK_THIS:
2328                         /* ensure ins->sreg1 is not NULL
2329                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2330                          * cmp DWORD PTR [eax], 0
2331                          */
2332                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2333                         break;
2334                 case OP_ARGLIST: {
2335                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2336                         x86_push_reg (code, hreg);
2337                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2338                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2339                         x86_pop_reg (code, hreg);
2340                         break;
2341                 }
2342                 case OP_FCALL:
2343                 case OP_LCALL:
2344                 case OP_VCALL:
2345                 case OP_VOIDCALL:
2346                 case CEE_CALL:
2347                         call = (MonoCallInst*)ins;
2348                         if (ins->flags & MONO_INST_HAS_METHOD)
2349                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2350                         else
2351                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2352                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2353                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2354                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2355                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2356                                  * smart enough to do that optimization yet
2357                                  *
2358                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2359                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2360                                  * (most likely from locality benefits). People with other processors should
2361                                  * check on theirs to see what happens.
2362                                  */
2363                                 if (call->stack_usage == 4) {
2364                                         /* we want to use registers that won't get used soon, so use
2365                                          * ecx, as eax will get allocated first. edx is used by long calls,
2366                                          * so we can't use that.
2367                                          */
2368                                         
2369                                         x86_pop_reg (code, X86_ECX);
2370                                 } else {
2371                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2372                                 }
2373                         }
2374                         code = emit_move_return_value (cfg, ins, code);
2375                         break;
2376                 case OP_FCALL_REG:
2377                 case OP_LCALL_REG:
2378                 case OP_VCALL_REG:
2379                 case OP_VOIDCALL_REG:
2380                 case OP_CALL_REG:
2381                         call = (MonoCallInst*)ins;
2382                         x86_call_reg (code, ins->sreg1);
2383                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2384                                 if (call->stack_usage == 4)
2385                                         x86_pop_reg (code, X86_ECX);
2386                                 else
2387                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2388                         }
2389                         code = emit_move_return_value (cfg, ins, code);
2390                         break;
2391                 case OP_FCALL_MEMBASE:
2392                 case OP_LCALL_MEMBASE:
2393                 case OP_VCALL_MEMBASE:
2394                 case OP_VOIDCALL_MEMBASE:
2395                 case OP_CALL_MEMBASE:
2396                         call = (MonoCallInst*)ins;
2397                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2398                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2399                                 if (call->stack_usage == 4)
2400                                         x86_pop_reg (code, X86_ECX);
2401                                 else
2402                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2403                         }
2404                         code = emit_move_return_value (cfg, ins, code);
2405                         break;
2406                 case OP_OUTARG:
2407                 case OP_X86_PUSH:
2408                         x86_push_reg (code, ins->sreg1);
2409                         break;
2410                 case OP_X86_PUSH_IMM:
2411                         x86_push_imm (code, ins->inst_imm);
2412                         break;
2413                 case OP_X86_PUSH_MEMBASE:
2414                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2415                         break;
2416                 case OP_X86_PUSH_OBJ: 
2417                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2418                         x86_push_reg (code, X86_EDI);
2419                         x86_push_reg (code, X86_ESI);
2420                         x86_push_reg (code, X86_ECX);
2421                         if (ins->inst_offset)
2422                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2423                         else
2424                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2425                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2426                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2427                         x86_cld (code);
2428                         x86_prefix (code, X86_REP_PREFIX);
2429                         x86_movsd (code);
2430                         x86_pop_reg (code, X86_ECX);
2431                         x86_pop_reg (code, X86_ESI);
2432                         x86_pop_reg (code, X86_EDI);
2433                         break;
2434                 case OP_X86_LEA:
2435                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2436                         break;
2437                 case OP_X86_LEA_MEMBASE:
2438                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2439                         break;
2440                 case OP_X86_XCHG:
2441                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2442                         break;
2443                 case OP_LOCALLOC:
2444                         /* keep alignment */
2445                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2446                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2447                         code = mono_emit_stack_alloc (code, ins);
2448                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2449                         break;
2450                 case CEE_RET:
2451                         x86_ret (code);
2452                         break;
2453                 case CEE_THROW: {
2454                         x86_push_reg (code, ins->sreg1);
2455                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2456                                                           (gpointer)"mono_arch_throw_exception");
2457                         break;
2458                 }
2459                 case OP_RETHROW: {
2460                         x86_push_reg (code, ins->sreg1);
2461                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2462                                                           (gpointer)"mono_arch_rethrow_exception");
2463                         break;
2464                 }
2465                 case OP_CALL_HANDLER: 
2466                         /* Align stack */
2467 #ifdef __APPLE__
2468                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2469 #endif
2470                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2471                         x86_call_imm (code, 0);
2472 #ifdef __APPLE__
2473                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2474 #endif
2475                         break;
2476                 case OP_LABEL:
2477                         ins->inst_c0 = code - cfg->native_code;
2478                         break;
2479                 case CEE_BR:
2480                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2481                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2482                         //break;
2483                         if (ins->flags & MONO_INST_BRLABEL) {
2484                                 if (ins->inst_i0->inst_c0) {
2485                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2486                                 } else {
2487                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2488                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2489                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2490                                                 x86_jump8 (code, 0);
2491                                         else 
2492                                                 x86_jump32 (code, 0);
2493                                 }
2494                         } else {
2495                                 if (ins->inst_target_bb->native_offset) {
2496                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2497                                 } else {
2498                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2499                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2500                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2501                                                 x86_jump8 (code, 0);
2502                                         else 
2503                                                 x86_jump32 (code, 0);
2504                                 } 
2505                         }
2506                         break;
2507                 case OP_BR_REG:
2508                         x86_jump_reg (code, ins->sreg1);
2509                         break;
2510                 case OP_CEQ:
2511                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2512                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2513                         break;
2514                 case OP_CLT:
2515                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2516                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2517                         break;
2518                 case OP_CLT_UN:
2519                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2520                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2521                         break;
2522                 case OP_CGT:
2523                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2524                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2525                         break;
2526                 case OP_CGT_UN:
2527                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2528                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2529                         break;
2530                 case OP_CNE:
2531                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2532                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2533                         break;
2534                 case OP_COND_EXC_EQ:
2535                 case OP_COND_EXC_NE_UN:
2536                 case OP_COND_EXC_LT:
2537                 case OP_COND_EXC_LT_UN:
2538                 case OP_COND_EXC_GT:
2539                 case OP_COND_EXC_GT_UN:
2540                 case OP_COND_EXC_GE:
2541                 case OP_COND_EXC_GE_UN:
2542                 case OP_COND_EXC_LE:
2543                 case OP_COND_EXC_LE_UN:
2544                 case OP_COND_EXC_OV:
2545                 case OP_COND_EXC_NO:
2546                 case OP_COND_EXC_C:
2547                 case OP_COND_EXC_NC:
2548                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2549                         break;
2550                 case CEE_BEQ:
2551                 case CEE_BNE_UN:
2552                 case CEE_BLT:
2553                 case CEE_BLT_UN:
2554                 case CEE_BGT:
2555                 case CEE_BGT_UN:
2556                 case CEE_BGE:
2557                 case CEE_BGE_UN:
2558                 case CEE_BLE:
2559                 case CEE_BLE_UN:
2560                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2561                         break;
2562
2563                 /* floating point opcodes */
2564                 case OP_R8CONST: {
2565                         double d = *(double *)ins->inst_p0;
2566
2567                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2568                                 x86_fldz (code);
2569                         } else if (d == 1.0) {
2570                                 x86_fld1 (code);
2571                         } else {
2572                                 if (cfg->compile_aot) {
2573                                         guint32 *val = (guint32*)&d;
2574                                         x86_push_imm (code, val [1]);
2575                                         x86_push_imm (code, val [0]);
2576                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2577                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2578                                 }
2579                                 else {
2580                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2581                                         x86_fld (code, NULL, TRUE);
2582                                 }
2583                         }
2584                         break;
2585                 }
2586                 case OP_R4CONST: {
2587                         float f = *(float *)ins->inst_p0;
2588
2589                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2590                                 x86_fldz (code);
2591                         } else if (f == 1.0) {
2592                                 x86_fld1 (code);
2593                         } else {
2594                                 if (cfg->compile_aot) {
2595                                         guint32 val = *(guint32*)&f;
2596                                         x86_push_imm (code, val);
2597                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2598                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2599                                 }
2600                                 else {
2601                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2602                                         x86_fld (code, NULL, FALSE);
2603                                 }
2604                         }
2605                         break;
2606                 }
2607                 case OP_STORER8_MEMBASE_REG:
2608                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2609                         break;
2610                 case OP_LOADR8_SPILL_MEMBASE:
2611                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2612                         x86_fxch (code, 1);
2613                         break;
2614                 case OP_LOADR8_MEMBASE:
2615                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2616                         break;
2617                 case OP_STORER4_MEMBASE_REG:
2618                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2619                         break;
2620                 case OP_LOADR4_MEMBASE:
2621                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2622                         break;
2623                 case CEE_CONV_R4: /* FIXME: change precision */
2624                 case CEE_CONV_R8:
2625                         x86_push_reg (code, ins->sreg1);
2626                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2627                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2628                         break;
2629                 case OP_X86_FP_LOAD_I8:
2630                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2631                         break;
2632                 case OP_X86_FP_LOAD_I4:
2633                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2634                         break;
2635                 case OP_FCONV_TO_I1:
2636                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2637                         break;
2638                 case OP_FCONV_TO_U1:
2639                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2640                         break;
2641                 case OP_FCONV_TO_I2:
2642                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2643                         break;
2644                 case OP_FCONV_TO_U2:
2645                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2646                         break;
2647                 case OP_FCONV_TO_I4:
2648                 case OP_FCONV_TO_I:
2649                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2650                         break;
2651                 case OP_FCONV_TO_I8:
2652                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2653                         x86_fnstcw_membase(code, X86_ESP, 0);
2654                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2655                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2656                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2657                         x86_fldcw_membase (code, X86_ESP, 2);
2658                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2659                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2660                         x86_pop_reg (code, ins->dreg);
2661                         x86_pop_reg (code, ins->backend.reg3);
2662                         x86_fldcw_membase (code, X86_ESP, 0);
2663                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2664                         break;
2665                 case OP_LCONV_TO_R_UN: { 
2666                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2667                         guint8 *br;
2668
2669                         /* load 64bit integer to FP stack */
2670                         x86_push_imm (code, 0);
2671                         x86_push_reg (code, ins->sreg2);
2672                         x86_push_reg (code, ins->sreg1);
2673                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2674                         /* store as 80bit FP value */
2675                         x86_fst80_membase (code, X86_ESP, 0);
2676                         
2677                         /* test if lreg is negative */
2678                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2679                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2680         
2681                         /* add correction constant mn */
2682                         x86_fld80_mem (code, mn);
2683                         x86_fld80_membase (code, X86_ESP, 0);
2684                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2685                         x86_fst80_membase (code, X86_ESP, 0);
2686
2687                         x86_patch (br, code);
2688
2689                         x86_fld80_membase (code, X86_ESP, 0);
2690                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2691
2692                         break;
2693                 }
2694                 case OP_LCONV_TO_OVF_I: {
2695                         guint8 *br [3], *label [1];
2696                         MonoInst *tins;
2697
2698                         /* 
2699                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2700                          */
2701                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2702
2703                         /* If the low word top bit is set, see if we are negative */
2704                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2705                         /* We are not negative (no top bit set, check for our top word to be zero */
2706                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2707                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2708                         label [0] = code;
2709
2710                         /* throw exception */
2711                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2712                         if (tins) {
2713                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2714                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2715                                         x86_jump8 (code, 0);
2716                                 else
2717                                         x86_jump32 (code, 0);
2718                         } else {
2719                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2720                                 x86_jump32 (code, 0);
2721                         }
2722         
2723         
2724                         x86_patch (br [0], code);
2725                         /* our top bit is set, check that top word is 0xfffffff */
2726                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2727                 
2728                         x86_patch (br [1], code);
2729                         /* nope, emit exception */
2730                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2731                         x86_patch (br [2], label [0]);
2732
2733                         if (ins->dreg != ins->sreg1)
2734                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2735                         break;
2736                 }
2737                 case OP_FADD:
2738                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2739                         break;
2740                 case OP_FSUB:
2741                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2742                         break;          
2743                 case OP_FMUL:
2744                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2745                         break;          
2746                 case OP_FDIV:
2747                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2748                         break;          
2749                 case OP_FNEG:
2750                         x86_fchs (code);
2751                         break;          
2752                 case OP_SIN:
2753                         x86_fsin (code);
2754                         x86_fldz (code);
2755                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2756                         break;          
2757                 case OP_COS:
2758                         x86_fcos (code);
2759                         x86_fldz (code);
2760                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2761                         break;          
2762                 case OP_ABS:
2763                         x86_fabs (code);
2764                         break;          
2765                 case OP_TAN: {
2766                         /* 
2767                          * it really doesn't make sense to inline all this code,
2768                          * it's here just to show that things may not be as simple 
2769                          * as they appear.
2770                          */
2771                         guchar *check_pos, *end_tan, *pop_jump;
2772                         x86_push_reg (code, X86_EAX);
2773                         x86_fptan (code);
2774                         x86_fnstsw (code);
2775                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2776                         check_pos = code;
2777                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2778                         x86_fstp (code, 0); /* pop the 1.0 */
2779                         end_tan = code;
2780                         x86_jump8 (code, 0);
2781                         x86_fldpi (code);
2782                         x86_fp_op (code, X86_FADD, 0);
2783                         x86_fxch (code, 1);
2784                         x86_fprem1 (code);
2785                         x86_fstsw (code);
2786                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2787                         pop_jump = code;
2788                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2789                         x86_fstp (code, 1);
2790                         x86_fptan (code);
2791                         x86_patch (pop_jump, code);
2792                         x86_fstp (code, 0); /* pop the 1.0 */
2793                         x86_patch (check_pos, code);
2794                         x86_patch (end_tan, code);
2795                         x86_fldz (code);
2796                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2797                         x86_pop_reg (code, X86_EAX);
2798                         break;
2799                 }
2800                 case OP_ATAN:
2801                         x86_fld1 (code);
2802                         x86_fpatan (code);
2803                         x86_fldz (code);
2804                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2805                         break;          
2806                 case OP_SQRT:
2807                         x86_fsqrt (code);
2808                         break;          
2809                 case OP_X86_FPOP:
2810                         x86_fstp (code, 0);
2811                         break;          
2812                 case OP_FREM: {
2813                         guint8 *l1, *l2;
2814
2815                         x86_push_reg (code, X86_EAX);
2816                         /* we need to exchange ST(0) with ST(1) */
2817                         x86_fxch (code, 1);
2818
2819                         /* this requires a loop, because fprem somtimes 
2820                          * returns a partial remainder */
2821                         l1 = code;
2822                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2823                         /* x86_fprem1 (code); */
2824                         x86_fprem (code);
2825                         x86_fnstsw (code);
2826                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
2827                         l2 = code + 2;
2828                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2829
2830                         /* pop result */
2831                         x86_fstp (code, 1);
2832
2833                         x86_pop_reg (code, X86_EAX);
2834                         break;
2835                 }
2836                 case OP_FCOMPARE:
2837                         if (cfg->opt & MONO_OPT_FCMOV) {
2838                                 x86_fcomip (code, 1);
2839                                 x86_fstp (code, 0);
2840                                 break;
2841                         }
2842                         /* this overwrites EAX */
2843                         EMIT_FPCOMPARE(code);
2844                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2845                         break;
2846                 case OP_FCEQ:
2847                         if (cfg->opt & MONO_OPT_FCMOV) {
2848                                 /* zeroing the register at the start results in 
2849                                  * shorter and faster code (we can also remove the widening op)
2850                                  */
2851                                 guchar *unordered_check;
2852                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2853                                 x86_fcomip (code, 1);
2854                                 x86_fstp (code, 0);
2855                                 unordered_check = code;
2856                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2857                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2858                                 x86_patch (unordered_check, code);
2859                                 break;
2860                         }
2861                         if (ins->dreg != X86_EAX) 
2862                                 x86_push_reg (code, X86_EAX);
2863
2864                         EMIT_FPCOMPARE(code);
2865                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2866                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2867                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2868                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2869
2870                         if (ins->dreg != X86_EAX) 
2871                                 x86_pop_reg (code, X86_EAX);
2872                         break;
2873                 case OP_FCLT:
2874                 case OP_FCLT_UN:
2875                         if (cfg->opt & MONO_OPT_FCMOV) {
2876                                 /* zeroing the register at the start results in 
2877                                  * shorter and faster code (we can also remove the widening op)
2878                                  */
2879                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2880                                 x86_fcomip (code, 1);
2881                                 x86_fstp (code, 0);
2882                                 if (ins->opcode == OP_FCLT_UN) {
2883                                         guchar *unordered_check = code;
2884                                         guchar *jump_to_end;
2885                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2886                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2887                                         jump_to_end = code;
2888                                         x86_jump8 (code, 0);
2889                                         x86_patch (unordered_check, code);
2890                                         x86_inc_reg (code, ins->dreg);
2891                                         x86_patch (jump_to_end, code);
2892                                 } else {
2893                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2894                                 }
2895                                 break;
2896                         }
2897                         if (ins->dreg != X86_EAX) 
2898                                 x86_push_reg (code, X86_EAX);
2899
2900                         EMIT_FPCOMPARE(code);
2901                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2902                         if (ins->opcode == OP_FCLT_UN) {
2903                                 guchar *is_not_zero_check, *end_jump;
2904                                 is_not_zero_check = code;
2905                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2906                                 end_jump = code;
2907                                 x86_jump8 (code, 0);
2908                                 x86_patch (is_not_zero_check, code);
2909                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2910
2911                                 x86_patch (end_jump, code);
2912                         }
2913                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2914                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2915
2916                         if (ins->dreg != X86_EAX) 
2917                                 x86_pop_reg (code, X86_EAX);
2918                         break;
2919                 case OP_FCGT:
2920                 case OP_FCGT_UN:
2921                         if (cfg->opt & MONO_OPT_FCMOV) {
2922                                 /* zeroing the register at the start results in 
2923                                  * shorter and faster code (we can also remove the widening op)
2924                                  */
2925                                 guchar *unordered_check;
2926                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2927                                 x86_fcomip (code, 1);
2928                                 x86_fstp (code, 0);
2929                                 if (ins->opcode == OP_FCGT) {
2930                                         unordered_check = code;
2931                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2932                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2933                                         x86_patch (unordered_check, code);
2934                                 } else {
2935                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2936                                 }
2937                                 break;
2938                         }
2939                         if (ins->dreg != X86_EAX) 
2940                                 x86_push_reg (code, X86_EAX);
2941
2942                         EMIT_FPCOMPARE(code);
2943                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2944                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2945                         if (ins->opcode == OP_FCGT_UN) {
2946                                 guchar *is_not_zero_check, *end_jump;
2947                                 is_not_zero_check = code;
2948                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2949                                 end_jump = code;
2950                                 x86_jump8 (code, 0);
2951                                 x86_patch (is_not_zero_check, code);
2952                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2953         
2954                                 x86_patch (end_jump, code);
2955                         }
2956                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2957                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2958
2959                         if (ins->dreg != X86_EAX) 
2960                                 x86_pop_reg (code, X86_EAX);
2961                         break;
2962                 case OP_FBEQ:
2963                         if (cfg->opt & MONO_OPT_FCMOV) {
2964                                 guchar *jump = code;
2965                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
2966                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2967                                 x86_patch (jump, code);
2968                                 break;
2969                         }
2970                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2971                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2972                         break;
2973                 case OP_FBNE_UN:
2974                         /* Branch if C013 != 100 */
2975                         if (cfg->opt & MONO_OPT_FCMOV) {
2976                                 /* branch if !ZF or (PF|CF) */
2977                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2978                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2979                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
2980                                 break;
2981                         }
2982                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2983                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2984                         break;
2985                 case OP_FBLT:
2986                         if (cfg->opt & MONO_OPT_FCMOV) {
2987                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2988                                 break;
2989                         }
2990                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2991                         break;
2992                 case OP_FBLT_UN:
2993                         if (cfg->opt & MONO_OPT_FCMOV) {
2994                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2995                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2996                                 break;
2997                         }
2998                         if (ins->opcode == OP_FBLT_UN) {
2999                                 guchar *is_not_zero_check, *end_jump;
3000                                 is_not_zero_check = code;
3001                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3002                                 end_jump = code;
3003                                 x86_jump8 (code, 0);
3004                                 x86_patch (is_not_zero_check, code);
3005                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3006
3007                                 x86_patch (end_jump, code);
3008                         }
3009                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3010                         break;
3011                 case OP_FBGT:
3012                 case OP_FBGT_UN:
3013                         if (cfg->opt & MONO_OPT_FCMOV) {
3014                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3015                                 break;
3016                         }
3017                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3018                         if (ins->opcode == OP_FBGT_UN) {
3019                                 guchar *is_not_zero_check, *end_jump;
3020                                 is_not_zero_check = code;
3021                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3022                                 end_jump = code;
3023                                 x86_jump8 (code, 0);
3024                                 x86_patch (is_not_zero_check, code);
3025                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3026
3027                                 x86_patch (end_jump, code);
3028                         }
3029                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3030                         break;
3031                 case OP_FBGE:
3032                         /* Branch if C013 == 100 or 001 */
3033                         if (cfg->opt & MONO_OPT_FCMOV) {
3034                                 guchar *br1;
3035
3036                                 /* skip branch if C1=1 */
3037                                 br1 = code;
3038                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3039                                 /* branch if (C0 | C3) = 1 */
3040                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3041                                 x86_patch (br1, code);
3042                                 break;
3043                         }
3044                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3045                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3046                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3047                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3048                         break;
3049                 case OP_FBGE_UN:
3050                         /* Branch if C013 == 000 */
3051                         if (cfg->opt & MONO_OPT_FCMOV) {
3052                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3053                                 break;
3054                         }
3055                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3056                         break;
3057                 case OP_FBLE:
3058                         /* Branch if C013=000 or 100 */
3059                         if (cfg->opt & MONO_OPT_FCMOV) {
3060                                 guchar *br1;
3061
3062                                 /* skip branch if C1=1 */
3063                                 br1 = code;
3064                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3065                                 /* branch if C0=0 */
3066                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3067                                 x86_patch (br1, code);
3068                                 break;
3069                         }
3070                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3071                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3072                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3073                         break;
3074                 case OP_FBLE_UN:
3075                         /* Branch if C013 != 001 */
3076                         if (cfg->opt & MONO_OPT_FCMOV) {
3077                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3078                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3079                                 break;
3080                         }
3081                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3082                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3083                         break;
3084                 case CEE_CKFINITE: {
3085                         x86_push_reg (code, X86_EAX);
3086                         x86_fxam (code);
3087                         x86_fnstsw (code);
3088                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3089                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3090                         x86_pop_reg (code, X86_EAX);
3091                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3092                         break;
3093                 }
3094                 case OP_TLS_GET: {
3095                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3096                         break;
3097                 }
3098                 case OP_MEMORY_BARRIER: {
3099                         /* Not needed on x86 */
3100                         break;
3101                 }
3102                 case OP_ATOMIC_ADD_I4: {
3103                         int dreg = ins->dreg;
3104
3105                         if (dreg == ins->inst_basereg) {
3106                                 x86_push_reg (code, ins->sreg2);
3107                                 dreg = ins->sreg2;
3108                         } 
3109                         
3110                         if (dreg != ins->sreg2)
3111                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3112
3113                         x86_prefix (code, X86_LOCK_PREFIX);
3114                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3115
3116                         if (dreg != ins->dreg) {
3117                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3118                                 x86_pop_reg (code, dreg);
3119                         }
3120
3121                         break;
3122                 }
3123                 case OP_ATOMIC_ADD_NEW_I4: {
3124                         int dreg = ins->dreg;
3125
3126                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3127                         if (ins->sreg2 == dreg) {
3128                                 if (dreg == X86_EBX) {
3129                                         dreg = X86_EDI;
3130                                         if (ins->inst_basereg == X86_EDI)
3131                                                 dreg = X86_ESI;
3132                                 } else {
3133                                         dreg = X86_EBX;
3134                                         if (ins->inst_basereg == X86_EBX)
3135                                                 dreg = X86_EDI;
3136                                 }
3137                         } else if (ins->inst_basereg == dreg) {
3138                                 if (dreg == X86_EBX) {
3139                                         dreg = X86_EDI;
3140                                         if (ins->sreg2 == X86_EDI)
3141                                                 dreg = X86_ESI;
3142                                 } else {
3143                                         dreg = X86_EBX;
3144                                         if (ins->sreg2 == X86_EBX)
3145                                                 dreg = X86_EDI;
3146                                 }
3147                         }
3148
3149                         if (dreg != ins->dreg) {
3150                                 x86_push_reg (code, dreg);
3151                         }
3152
3153                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3154                         x86_prefix (code, X86_LOCK_PREFIX);
3155                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3156                         /* dreg contains the old value, add with sreg2 value */
3157                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3158                         
3159                         if (ins->dreg != dreg) {
3160                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3161                                 x86_pop_reg (code, dreg);
3162                         }
3163
3164                         break;
3165                 }
3166                 case OP_ATOMIC_EXCHANGE_I4: {
3167                         guchar *br[2];
3168                         int sreg2 = ins->sreg2;
3169                         int breg = ins->inst_basereg;
3170
3171                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3172                          * hack to overcome limits in x86 reg allocator 
3173                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3174                          */
3175                         if (ins->dreg != X86_EAX)
3176                                 x86_push_reg (code, X86_EAX);
3177                         
3178                         /* We need the EAX reg for the cmpxchg */
3179                         if (ins->sreg2 == X86_EAX) {
3180                                 x86_push_reg (code, X86_EDX);
3181                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3182                                 sreg2 = X86_EDX;
3183                         }
3184
3185                         if (breg == X86_EAX) {
3186                                 x86_push_reg (code, X86_ESI);
3187                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3188                                 breg = X86_ESI;
3189                         }
3190
3191                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3192
3193                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3194                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3195                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3196                         x86_patch (br [1], br [0]);
3197
3198                         if (breg != ins->inst_basereg)
3199                                 x86_pop_reg (code, X86_ESI);
3200
3201                         if (ins->dreg != X86_EAX) {
3202                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3203                                 x86_pop_reg (code, X86_EAX);
3204                         }
3205
3206                         if (ins->sreg2 != sreg2)
3207                                 x86_pop_reg (code, X86_EDX);
3208
3209                         break;
3210                 }
3211                 default:
3212                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3213                         g_assert_not_reached ();
3214                 }
3215
3216                 if ((code - cfg->native_code - offset) > max_len) {
3217                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3218                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3219                         g_assert_not_reached ();
3220                 }
3221                
3222                 cpos += max_len;
3223
3224                 last_ins = ins;
3225                 last_offset = offset;
3226                 
3227                 ins = ins->next;
3228         }
3229
3230         cfg->code_len = code - cfg->native_code;
3231 }
3232
3233 void
3234 mono_arch_register_lowlevel_calls (void)
3235 {
3236 }
3237
3238 void
3239 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3240 {
3241         MonoJumpInfo *patch_info;
3242         gboolean compile_aot = !run_cctors;
3243
3244         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3245                 unsigned char *ip = patch_info->ip.i + code;
3246                 const unsigned char *target;
3247
3248                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3249
3250                 if (compile_aot) {
3251                         switch (patch_info->type) {
3252                         case MONO_PATCH_INFO_BB:
3253                         case MONO_PATCH_INFO_LABEL:
3254                                 break;
3255                         default:
3256                                 /* No need to patch these */
3257                                 continue;
3258                         }
3259                 }
3260
3261                 switch (patch_info->type) {
3262                 case MONO_PATCH_INFO_IP:
3263                         *((gconstpointer *)(ip)) = target;
3264                         break;
3265                 case MONO_PATCH_INFO_CLASS_INIT: {
3266                         guint8 *code = ip;
3267                         /* Might already been changed to a nop */
3268                         x86_call_code (code, 0);
3269                         x86_patch (ip, target);
3270                         break;
3271                 }
3272                 case MONO_PATCH_INFO_ABS:
3273                 case MONO_PATCH_INFO_METHOD:
3274                 case MONO_PATCH_INFO_METHOD_JUMP:
3275                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3276                 case MONO_PATCH_INFO_BB:
3277                 case MONO_PATCH_INFO_LABEL:
3278                         x86_patch (ip, target);
3279                         break;
3280                 case MONO_PATCH_INFO_NONE:
3281                         break;
3282                 default: {
3283                         guint32 offset = mono_arch_get_patch_offset (ip);
3284                         *((gconstpointer *)(ip + offset)) = target;
3285                         break;
3286                 }
3287                 }
3288         }
3289 }
3290
3291 guint8 *
3292 mono_arch_emit_prolog (MonoCompile *cfg)
3293 {
3294         MonoMethod *method = cfg->method;
3295         MonoBasicBlock *bb;
3296         MonoMethodSignature *sig;
3297         MonoInst *inst;
3298         int alloc_size, pos, max_offset, i;
3299         guint8 *code;
3300
3301         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3302         code = cfg->native_code = g_malloc (cfg->code_size);
3303
3304         x86_push_reg (code, X86_EBP);
3305         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3306
3307         alloc_size = cfg->stack_offset;
3308         pos = 0;
3309
3310         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3311                 /* Might need to attach the thread to the JIT */
3312                 if (lmf_tls_offset != -1) {
3313                         guint8 *buf;
3314
3315                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3316                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3317                         buf = code;
3318                         x86_branch8 (code, X86_CC_NE, 0, 0);
3319                         x86_push_imm (code, cfg->domain);
3320                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3321                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3322                         x86_patch (buf, code);
3323 #ifdef PLATFORM_WIN32
3324                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3325                         /* FIXME: Add a separate key for LMF to avoid this */
3326                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3327 #endif
3328                 } else {
3329                         g_assert (!cfg->compile_aot);
3330                         x86_push_imm (code, cfg->domain);
3331                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3332                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3333                 }
3334         }
3335
3336         if (method->save_lmf) {
3337                 pos += sizeof (MonoLMF);
3338
3339                 /* save the current IP */
3340                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3341                 x86_push_imm_template (code);
3342
3343                 /* save all caller saved regs */
3344                 x86_push_reg (code, X86_EBP);
3345                 x86_push_reg (code, X86_ESI);
3346                 x86_push_reg (code, X86_EDI);
3347                 x86_push_reg (code, X86_EBX);
3348
3349                 /* save method info */
3350                 x86_push_imm (code, method);
3351
3352                 /* get the address of lmf for the current thread */
3353                 /* 
3354                  * This is performance critical so we try to use some tricks to make
3355                  * it fast.
3356                  */
3357                 if (lmf_tls_offset != -1) {
3358                         /* Load lmf quicky using the GS register */
3359                         code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
3360 #ifdef PLATFORM_WIN32
3361                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3362                         /* FIXME: Add a separate key for LMF to avoid this */
3363                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3364 #endif
3365                 } else {
3366                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3367                 }
3368
3369                 /* push lmf */
3370                 x86_push_reg (code, X86_EAX); 
3371                 /* push *lfm (previous_lmf) */
3372                 x86_push_membase (code, X86_EAX, 0);
3373                 /* *(lmf) = ESP */
3374                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3375         } else {
3376
3377                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3378                         x86_push_reg (code, X86_EBX);
3379                         pos += 4;
3380                 }
3381
3382                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3383                         x86_push_reg (code, X86_EDI);
3384                         pos += 4;
3385                 }
3386
3387                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3388                         x86_push_reg (code, X86_ESI);
3389                         pos += 4;
3390                 }
3391         }
3392
3393         alloc_size -= pos;
3394
3395 #if __APPLE__
3396         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3397         {
3398                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3399                 if (tot & 4) {
3400                         tot += 4;
3401                         alloc_size += 4;
3402                 }
3403                 if (tot & 8) {
3404                         alloc_size += 8;
3405                 }
3406         }
3407 #endif
3408
3409         if (alloc_size) {
3410                 /* See mono_emit_stack_alloc */
3411 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3412                 guint32 remaining_size = alloc_size;
3413                 while (remaining_size >= 0x1000) {
3414                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3415                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3416                         remaining_size -= 0x1000;
3417                 }
3418                 if (remaining_size)
3419                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3420 #else
3421                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3422 #endif
3423         }
3424
3425 #if __APPLE_
3426         /* check the stack is aligned */
3427         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3428         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3429         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3430         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3431         x86_breakpoint (code);
3432 #endif
3433
3434         /* compute max_offset in order to use short forward jumps */
3435         max_offset = 0;
3436         if (cfg->opt & MONO_OPT_BRANCH) {
3437                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3438                         MonoInst *ins = bb->code;
3439                         bb->max_offset = max_offset;
3440
3441                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3442                                 max_offset += 6;
3443                         /* max alignment for loops */
3444                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3445                                 max_offset += LOOP_ALIGNMENT;
3446
3447                         while (ins) {
3448                                 if (ins->opcode == OP_LABEL)
3449                                         ins->inst_c1 = max_offset;
3450                                 
3451                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3452                                 ins = ins->next;
3453                         }
3454                 }
3455         }
3456
3457         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3458                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3459
3460         /* load arguments allocated to register from the stack */
3461         sig = mono_method_signature (method);
3462         pos = 0;
3463
3464         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3465                 inst = cfg->varinfo [pos];
3466                 if (inst->opcode == OP_REGVAR) {
3467                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3468                         if (cfg->verbose_level > 2)
3469                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3470                 }
3471                 pos++;
3472         }
3473
3474         cfg->code_len = code - cfg->native_code;
3475
3476         return code;
3477 }
3478
3479 void
3480 mono_arch_emit_epilog (MonoCompile *cfg)
3481 {
3482         MonoMethod *method = cfg->method;
3483         MonoMethodSignature *sig = mono_method_signature (method);
3484         int quad, pos;
3485         guint32 stack_to_pop;
3486         guint8 *code;
3487         int max_epilog_size = 16;
3488         CallInfo *cinfo;
3489         
3490         if (cfg->method->save_lmf)
3491                 max_epilog_size += 128;
3492         
3493         if (mono_jit_trace_calls != NULL)
3494                 max_epilog_size += 50;
3495
3496         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3497                 cfg->code_size *= 2;
3498                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3499                 mono_jit_stats.code_reallocs++;
3500         }
3501
3502         code = cfg->native_code + cfg->code_len;
3503
3504         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3505                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3506
3507         /* the code restoring the registers must be kept in sync with CEE_JMP */
3508         pos = 0;
3509         
3510         if (method->save_lmf) {
3511                 gint32 prev_lmf_reg;
3512                 gint32 lmf_offset = -sizeof (MonoLMF);
3513
3514                 /* Find a spare register */
3515                 switch (sig->ret->type) {
3516                 case MONO_TYPE_I8:
3517                 case MONO_TYPE_U8:
3518                         prev_lmf_reg = X86_EDI;
3519                         cfg->used_int_regs |= (1 << X86_EDI);
3520                         break;
3521                 default:
3522                         prev_lmf_reg = X86_EDX;
3523                         break;
3524                 }
3525
3526                 /* reg = previous_lmf */
3527                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3528
3529                 /* ecx = lmf */
3530                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3531
3532                 /* *(lmf) = previous_lmf */
3533                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3534
3535                 /* restore caller saved regs */
3536                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3537                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3538                 }
3539
3540                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3541                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3542                 }
3543                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3544                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3545                 }
3546
3547                 /* EBP is restored by LEAVE */
3548         } else {
3549                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3550                         pos -= 4;
3551                 }
3552                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3553                         pos -= 4;
3554                 }
3555                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3556                         pos -= 4;
3557                 }
3558
3559                 if (pos)
3560                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3561
3562                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3563                         x86_pop_reg (code, X86_ESI);
3564                 }
3565                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3566                         x86_pop_reg (code, X86_EDI);
3567                 }
3568                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3569                         x86_pop_reg (code, X86_EBX);
3570                 }
3571         }
3572
3573         /* Load returned vtypes into registers if needed */
3574         cinfo = get_call_info (sig, FALSE);
3575         if (cinfo->ret.storage == ArgValuetypeInReg) {
3576                 for (quad = 0; quad < 2; quad ++) {
3577                         switch (cinfo->ret.pair_storage [quad]) {
3578                         case ArgInIReg:
3579                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3580                                 break;
3581                         case ArgOnFloatFpStack:
3582                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3583                                 break;
3584                         case ArgOnDoubleFpStack:
3585                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3586                                 break;
3587                         case ArgNone:
3588                                 break;
3589                         default:
3590                                 g_assert_not_reached ();
3591                         }
3592                 }
3593         }
3594
3595         x86_leave (code);
3596
3597         if (CALLCONV_IS_STDCALL (sig)) {
3598                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3599
3600                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3601         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3602                 stack_to_pop = 4;
3603         else
3604                 stack_to_pop = 0;
3605
3606         if (stack_to_pop)
3607                 x86_ret_imm (code, stack_to_pop);
3608         else
3609                 x86_ret (code);
3610
3611         g_free (cinfo);
3612
3613         cfg->code_len = code - cfg->native_code;
3614
3615         g_assert (cfg->code_len < cfg->code_size);
3616 }
3617
3618 void
3619 mono_arch_emit_exceptions (MonoCompile *cfg)
3620 {
3621         MonoJumpInfo *patch_info;
3622         int nthrows, i;
3623         guint8 *code;
3624         MonoClass *exc_classes [16];
3625         guint8 *exc_throw_start [16], *exc_throw_end [16];
3626         guint32 code_size;
3627         int exc_count = 0;
3628
3629         /* Compute needed space */
3630         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3631                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3632                         exc_count++;
3633         }
3634
3635         /* 
3636          * make sure we have enough space for exceptions
3637          * 16 is the size of two push_imm instructions and a call
3638          */
3639         if (cfg->compile_aot)
3640                 code_size = exc_count * 32;
3641         else
3642                 code_size = exc_count * 16;
3643
3644         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3645                 cfg->code_size *= 2;
3646                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3647                 mono_jit_stats.code_reallocs++;
3648         }
3649
3650         code = cfg->native_code + cfg->code_len;
3651
3652         nthrows = 0;
3653         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3654                 switch (patch_info->type) {
3655                 case MONO_PATCH_INFO_EXC: {
3656                         MonoClass *exc_class;
3657                         guint8 *buf, *buf2;
3658                         guint32 throw_ip;
3659
3660                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3661
3662                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3663                         g_assert (exc_class);
3664                         throw_ip = patch_info->ip.i;
3665
3666                         /* Find a throw sequence for the same exception class */
3667                         for (i = 0; i < nthrows; ++i)
3668                                 if (exc_classes [i] == exc_class)
3669                                         break;
3670                         if (i < nthrows) {
3671                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3672                                 x86_jump_code (code, exc_throw_start [i]);
3673                                 patch_info->type = MONO_PATCH_INFO_NONE;
3674                         }
3675                         else {
3676                                 guint32 size;
3677
3678                                 /* Compute size of code following the push <OFFSET> */
3679                                 size = 5 + 5;
3680
3681                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3682                                         /* Use the shorter form */
3683                                         buf = buf2 = code;
3684                                         x86_push_imm (code, 0);
3685                                 }
3686                                 else {
3687                                         buf = code;
3688                                         x86_push_imm (code, 0xf0f0f0f0);
3689                                         buf2 = code;
3690                                 }
3691
3692                                 if (nthrows < 16) {
3693                                         exc_classes [nthrows] = exc_class;
3694                                         exc_throw_start [nthrows] = code;
3695                                 }
3696
3697                                 x86_push_imm (code, exc_class->type_token);
3698                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3699                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3700                                 patch_info->ip.i = code - cfg->native_code;
3701                                 x86_call_code (code, 0);
3702                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3703                                 while (buf < buf2)
3704                                         x86_nop (buf);
3705
3706                                 if (nthrows < 16) {
3707                                         exc_throw_end [nthrows] = code;
3708                                         nthrows ++;
3709                                 }
3710                         }
3711                         break;
3712                 }
3713                 default:
3714                         /* do nothing */
3715                         break;
3716                 }
3717         }
3718
3719         cfg->code_len = code - cfg->native_code;
3720
3721         g_assert (cfg->code_len < cfg->code_size);
3722 }
3723
3724 void
3725 mono_arch_flush_icache (guint8 *code, gint size)
3726 {
3727         /* not needed */
3728 }
3729
3730 void
3731 mono_arch_flush_register_windows (void)
3732 {
3733 }
3734
3735 /*
3736  * Support for fast access to the thread-local lmf structure using the GS
3737  * segment register on NPTL + kernel 2.6.x.
3738  */
3739
3740 static gboolean tls_offset_inited = FALSE;
3741
3742 void
3743 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3744 {
3745         if (!tls_offset_inited) {
3746                 if (!getenv ("MONO_NO_TLS")) {
3747 #ifdef PLATFORM_WIN32
3748                         /* 
3749                          * We need to init this multiple times, since when we are first called, the key might not
3750                          * be initialized yet.
3751                          */
3752                         appdomain_tls_offset = mono_domain_get_tls_key ();
3753                         lmf_tls_offset = mono_get_jit_tls_key ();
3754                         thread_tls_offset = mono_thread_get_tls_key ();
3755
3756                         /* Only 64 tls entries can be accessed using inline code */
3757                         if (appdomain_tls_offset >= 64)
3758                                 appdomain_tls_offset = -1;
3759                         if (lmf_tls_offset >= 64)
3760                                 lmf_tls_offset = -1;
3761                         if (thread_tls_offset >= 64)
3762                                 thread_tls_offset = -1;
3763 #else
3764 #if MONO_XEN_OPT
3765                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
3766 #endif
3767                         tls_offset_inited = TRUE;
3768                         appdomain_tls_offset = mono_domain_get_tls_offset ();
3769                         lmf_tls_offset = mono_get_lmf_tls_offset ();
3770                         thread_tls_offset = mono_thread_get_tls_offset ();
3771 #endif
3772                 }
3773         }               
3774 }
3775
3776 void
3777 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
3778 {
3779 }
3780
3781 void
3782 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3783 {
3784         MonoCallInst *call = (MonoCallInst*)inst;
3785         CallInfo *cinfo = get_call_info (inst->signature, FALSE);
3786
3787         /* add the this argument */
3788         if (this_reg != -1) {
3789                 if (cinfo->args [0].storage == ArgInIReg) {
3790                         MonoInst *this;
3791                         MONO_INST_NEW (cfg, this, OP_MOVE);
3792                         this->type = this_type;
3793                         this->sreg1 = this_reg;
3794                         this->dreg = mono_regstate_next_int (cfg->rs);
3795                         mono_bblock_add_inst (cfg->cbb, this);
3796
3797                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
3798                 }
3799                 else {
3800                         MonoInst *this;
3801                         MONO_INST_NEW (cfg, this, OP_OUTARG);
3802                         this->type = this_type;
3803                         this->sreg1 = this_reg;
3804                         mono_bblock_add_inst (cfg->cbb, this);
3805                 }
3806         }
3807
3808         if (vt_reg != -1) {
3809                 MonoInst *vtarg;
3810
3811                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3812                         /*
3813                          * The valuetype is in EAX:EDX after the call, needs to be copied to
3814                          * the stack. Save the address here, so the call instruction can
3815                          * access it.
3816                          */
3817                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
3818                         vtarg->inst_destbasereg = X86_ESP;
3819                         vtarg->inst_offset = inst->stack_usage;
3820                         vtarg->sreg1 = vt_reg;
3821                         mono_bblock_add_inst (cfg->cbb, vtarg);
3822                 }
3823                 else if (cinfo->ret.storage == ArgInIReg) {
3824                         /* The return address is passed in a register */
3825                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
3826                         vtarg->sreg1 = vt_reg;
3827                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
3828                         mono_bblock_add_inst (cfg->cbb, vtarg);
3829
3830                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
3831                 } else {
3832                         MonoInst *vtarg;
3833                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3834                         vtarg->type = STACK_MP;
3835                         vtarg->sreg1 = vt_reg;
3836                         mono_bblock_add_inst (cfg->cbb, vtarg);
3837                 }
3838         }
3839
3840         g_free (cinfo);
3841 }
3842
3843 MonoInst*
3844 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3845 {
3846         MonoInst *ins = NULL;
3847
3848         if (cmethod->klass == mono_defaults.math_class) {
3849                 if (strcmp (cmethod->name, "Sin") == 0) {
3850                         MONO_INST_NEW (cfg, ins, OP_SIN);
3851                         ins->inst_i0 = args [0];
3852                 } else if (strcmp (cmethod->name, "Cos") == 0) {
3853                         MONO_INST_NEW (cfg, ins, OP_COS);
3854                         ins->inst_i0 = args [0];
3855                 } else if (strcmp (cmethod->name, "Tan") == 0) {
3856                         MONO_INST_NEW (cfg, ins, OP_TAN);
3857                         ins->inst_i0 = args [0];
3858                 } else if (strcmp (cmethod->name, "Atan") == 0) {
3859                         MONO_INST_NEW (cfg, ins, OP_ATAN);
3860                         ins->inst_i0 = args [0];
3861                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
3862                         MONO_INST_NEW (cfg, ins, OP_SQRT);
3863                         ins->inst_i0 = args [0];
3864                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
3865                         MONO_INST_NEW (cfg, ins, OP_ABS);
3866                         ins->inst_i0 = args [0];
3867                 }
3868 #if 0
3869                 /* OP_FREM is not IEEE compatible */
3870                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
3871                         MONO_INST_NEW (cfg, ins, OP_FREM);
3872                         ins->inst_i0 = args [0];
3873                         ins->inst_i1 = args [1];
3874                 }
3875 #endif
3876         } else if (cmethod->klass == mono_defaults.thread_class &&
3877                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
3878                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
3879         } else if(cmethod->klass->image == mono_defaults.corlib &&
3880                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
3881                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
3882
3883                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3884                         MonoInst *ins_iconst;
3885
3886                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3887                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3888                         ins_iconst->inst_c0 = 1;
3889
3890                         ins->inst_i0 = args [0];
3891                         ins->inst_i1 = ins_iconst;
3892                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3893                         MonoInst *ins_iconst;
3894
3895                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3896                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3897                         ins_iconst->inst_c0 = -1;
3898
3899                         ins->inst_i0 = args [0];
3900                         ins->inst_i1 = ins_iconst;
3901                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3902                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
3903
3904                         ins->inst_i0 = args [0];
3905                         ins->inst_i1 = args [1];
3906                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3907                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3908
3909                         ins->inst_i0 = args [0];
3910                         ins->inst_i1 = args [1];
3911                 }
3912         }
3913
3914         return ins;
3915 }
3916
3917
3918 gboolean
3919 mono_arch_print_tree (MonoInst *tree, int arity)
3920 {
3921         return 0;
3922 }
3923
3924 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
3925 {
3926         MonoInst* ins;
3927         
3928         if (appdomain_tls_offset == -1)
3929                 return NULL;
3930
3931         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3932         ins->inst_offset = appdomain_tls_offset;
3933         return ins;
3934 }
3935
3936 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
3937 {
3938         MonoInst* ins;
3939
3940         if (thread_tls_offset == -1)
3941                 return NULL;
3942
3943         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3944         ins->inst_offset = thread_tls_offset;
3945         return ins;
3946 }
3947
3948 guint32
3949 mono_arch_get_patch_offset (guint8 *code)
3950 {
3951         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
3952                 return 2;
3953         else if ((code [0] == 0xba))
3954                 return 1;
3955         else if ((code [0] == 0x68))
3956                 /* push IMM */
3957                 return 1;
3958         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
3959                 /* push <OFFSET>(<REG>) */
3960                 return 2;
3961         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
3962                 /* call *<OFFSET>(<REG>) */
3963                 return 2;
3964         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
3965                 /* fldl <ADDR> */
3966                 return 2;
3967         else if ((code [0] == 0x58) && (code [1] == 0x05))
3968                 /* pop %eax; add <OFFSET>, %eax */
3969                 return 2;
3970         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
3971                 /* pop <REG>; add <OFFSET>, <REG> */
3972                 return 3;
3973         else {
3974                 g_assert_not_reached ();
3975                 return -1;
3976         }
3977 }
3978
3979 gpointer*
3980 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
3981 {
3982         guint8 reg = 0;
3983         gint32 disp = 0;
3984
3985         /* go to the start of the call instruction
3986          *
3987          * address_byte = (m << 6) | (o << 3) | reg
3988          * call opcode: 0xff address_byte displacement
3989          * 0xff m=1,o=2 imm8
3990          * 0xff m=2,o=2 imm32
3991          */
3992         code -= 6;
3993
3994         /* 
3995          * A given byte sequence can match more than case here, so we have to be
3996          * really careful about the ordering of the cases. Longer sequences
3997          * come first.
3998          */
3999         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
4000                 /*
4001                  * This is an interface call
4002                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
4003                  * ff 10                   call   *(%eax)
4004                  */
4005                 reg = x86_modrm_rm (code [5]);
4006                 disp = 0;
4007         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4008                 reg = code [4] & 0x07;
4009                 disp = (signed char)code [5];
4010         } else {
4011                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4012                         reg = code [1] & 0x07;
4013                         disp = *((gint32*)(code + 2));
4014                 } else if ((code [1] == 0xe8)) {
4015                         return NULL;
4016                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4017                         /*
4018                          * This is a interface call
4019                          * 8b 40 30   mov    0x30(%eax),%eax
4020                          * ff 10      call   *(%eax)
4021                          */
4022                         disp = 0;
4023                         reg = code [5] & 0x07;
4024                 }
4025                 else
4026                         return NULL;
4027         }
4028
4029         return (gpointer*)(((gint32)(regs [reg])) + disp);
4030 }
4031
4032 gpointer* 
4033 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
4034 {
4035         guint8 reg = 0;
4036         gint32 disp = 0;
4037
4038         code -= 7;
4039         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
4040                 reg = x86_modrm_rm (code [1]);
4041                 disp = code [4];
4042
4043                 if (reg == X86_EAX)
4044                         return NULL;
4045                 else
4046                         return (gpointer*)(((gint32)(regs [reg])) + disp);
4047         }
4048
4049         return NULL;
4050 }