2006-04-04 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #include <unistd.h>
15
16 #include <mono/metadata/appdomain.h>
17 #include <mono/metadata/debug-helpers.h>
18 #include <mono/metadata/threads.h>
19 #include <mono/metadata/profiler-private.h>
20 #include <mono/utils/mono-math.h>
21
22 #include "trace.h"
23 #include "mini-x86.h"
24 #include "inssel.h"
25 #include "cpu-pentium.h"
26
27 /* On windows, these hold the key returned by TlsAlloc () */
28 static gint lmf_tls_offset = -1;
29 static gint appdomain_tls_offset = -1;
30 static gint thread_tls_offset = -1;
31
32 #ifdef MONO_XEN_OPT
33 /* TRUE by default until we add runtime detection of Xen */
34 static gboolean optimize_for_xen = TRUE;
35 #else
36 #define optimize_for_xen 0
37 #endif
38
39 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
40
41 #define ARGS_OFFSET 8
42
43 #ifdef PLATFORM_WIN32
44 /* Under windows, the default pinvoke calling convention is stdcall */
45 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
46 #else
47 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
48 #endif
49
50 #define NOT_IMPLEMENTED g_assert_not_reached ()
51
52 const char*
53 mono_arch_regname (int reg) {
54         switch (reg) {
55         case X86_EAX: return "%eax";
56         case X86_EBX: return "%ebx";
57         case X86_ECX: return "%ecx";
58         case X86_EDX: return "%edx";
59         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
60         case X86_EDI: return "%edi";
61         case X86_ESI: return "%esi";
62         }
63         return "unknown";
64 }
65
66 const char*
67 mono_arch_fregname (int reg) {
68         return "unknown";
69 }
70
71 typedef enum {
72         ArgInIReg,
73         ArgInFloatSSEReg,
74         ArgInDoubleSSEReg,
75         ArgOnStack,
76         ArgValuetypeInReg,
77         ArgOnFloatFpStack,
78         ArgOnDoubleFpStack,
79         ArgNone
80 } ArgStorage;
81
82 typedef struct {
83         gint16 offset;
84         gint8  reg;
85         ArgStorage storage;
86
87         /* Only if storage == ArgValuetypeInReg */
88         ArgStorage pair_storage [2];
89         gint8 pair_regs [2];
90 } ArgInfo;
91
92 typedef struct {
93         int nargs;
94         guint32 stack_usage;
95         guint32 reg_usage;
96         guint32 freg_usage;
97         gboolean need_stack_align;
98         guint32 stack_align_amount;
99         ArgInfo ret;
100         ArgInfo sig_cookie;
101         ArgInfo args [1];
102 } CallInfo;
103
104 #define PARAM_REGS 0
105
106 #define FLOAT_PARAM_REGS 0
107
108 static X86_Reg_No param_regs [] = { 0 };
109
110 #ifdef PLATFORM_WIN32
111 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
112 #endif
113
114 static void inline
115 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
116 {
117     ainfo->offset = *stack_size;
118
119     if (*gr >= PARAM_REGS) {
120                 ainfo->storage = ArgOnStack;
121                 (*stack_size) += sizeof (gpointer);
122     }
123     else {
124                 ainfo->storage = ArgInIReg;
125                 ainfo->reg = param_regs [*gr];
126                 (*gr) ++;
127     }
128 }
129
130 static void inline
131 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
132 {
133         ainfo->offset = *stack_size;
134
135         g_assert (PARAM_REGS == 0);
136         
137         ainfo->storage = ArgOnStack;
138         (*stack_size) += sizeof (gpointer) * 2;
139 }
140
141 static void inline
142 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
143 {
144     ainfo->offset = *stack_size;
145
146     if (*gr >= FLOAT_PARAM_REGS) {
147                 ainfo->storage = ArgOnStack;
148                 (*stack_size) += is_double ? 8 : 4;
149     }
150     else {
151                 /* A double register */
152                 if (is_double)
153                         ainfo->storage = ArgInDoubleSSEReg;
154                 else
155                         ainfo->storage = ArgInFloatSSEReg;
156                 ainfo->reg = *gr;
157                 (*gr) += 1;
158     }
159 }
160
161
162 static void
163 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
164                gboolean is_return,
165                guint32 *gr, guint32 *fr, guint32 *stack_size)
166 {
167         guint32 size;
168         MonoClass *klass;
169
170         klass = mono_class_from_mono_type (type);
171         if (sig->pinvoke) 
172                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
173         else 
174                 size = mono_type_stack_size (&klass->byval_arg, NULL);
175
176 #ifdef PLATFORM_WIN32
177         if (sig->pinvoke && is_return) {
178                 MonoMarshalType *info;
179
180                 /*
181                  * the exact rules are not very well documented, the code below seems to work with the 
182                  * code generated by gcc 3.3.3 -mno-cygwin.
183                  */
184                 info = mono_marshal_load_type_info (klass);
185                 g_assert (info);
186
187                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
188
189                 /* Special case structs with only a float member */
190                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
191                         ainfo->storage = ArgValuetypeInReg;
192                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
193                         return;
194                 }
195                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
196                         ainfo->storage = ArgValuetypeInReg;
197                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
198                         return;
199                 }               
200                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
201                         ainfo->storage = ArgValuetypeInReg;
202                         ainfo->pair_storage [0] = ArgInIReg;
203                         ainfo->pair_regs [0] = return_regs [0];
204                         if (info->native_size > 4) {
205                                 ainfo->pair_storage [1] = ArgInIReg;
206                                 ainfo->pair_regs [1] = return_regs [1];
207                         }
208                         return;
209                 }
210         }
211 #endif
212
213         ainfo->offset = *stack_size;
214         ainfo->storage = ArgOnStack;
215         *stack_size += ALIGN_TO (size, sizeof (gpointer));
216 }
217
218 /*
219  * get_call_info:
220  *
221  *  Obtain information about a call according to the calling convention.
222  * For x86 ELF, see the "System V Application Binary Interface Intel386 
223  * Architecture Processor Supplment, Fourth Edition" document for more
224  * information.
225  * For x86 win32, see ???.
226  */
227 static CallInfo*
228 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
229 {
230         guint32 i, gr, fr;
231         MonoType *ret_type;
232         int n = sig->hasthis + sig->param_count;
233         guint32 stack_size = 0;
234         CallInfo *cinfo;
235
236         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
237
238         gr = 0;
239         fr = 0;
240
241         /* return value */
242         {
243                 ret_type = mono_type_get_underlying_type (sig->ret);
244                 switch (ret_type->type) {
245                 case MONO_TYPE_BOOLEAN:
246                 case MONO_TYPE_I1:
247                 case MONO_TYPE_U1:
248                 case MONO_TYPE_I2:
249                 case MONO_TYPE_U2:
250                 case MONO_TYPE_CHAR:
251                 case MONO_TYPE_I4:
252                 case MONO_TYPE_U4:
253                 case MONO_TYPE_I:
254                 case MONO_TYPE_U:
255                 case MONO_TYPE_PTR:
256                 case MONO_TYPE_FNPTR:
257                 case MONO_TYPE_CLASS:
258                 case MONO_TYPE_OBJECT:
259                 case MONO_TYPE_SZARRAY:
260                 case MONO_TYPE_ARRAY:
261                 case MONO_TYPE_STRING:
262                         cinfo->ret.storage = ArgInIReg;
263                         cinfo->ret.reg = X86_EAX;
264                         break;
265                 case MONO_TYPE_U8:
266                 case MONO_TYPE_I8:
267                         cinfo->ret.storage = ArgInIReg;
268                         cinfo->ret.reg = X86_EAX;
269                         break;
270                 case MONO_TYPE_R4:
271                         cinfo->ret.storage = ArgOnFloatFpStack;
272                         break;
273                 case MONO_TYPE_R8:
274                         cinfo->ret.storage = ArgOnDoubleFpStack;
275                         break;
276                 case MONO_TYPE_GENERICINST:
277                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
278                                 cinfo->ret.storage = ArgInIReg;
279                                 cinfo->ret.reg = X86_EAX;
280                                 break;
281                         }
282                         /* Fall through */
283                 case MONO_TYPE_VALUETYPE: {
284                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
285
286                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
287                         if (cinfo->ret.storage == ArgOnStack)
288                                 /* The caller passes the address where the value is stored */
289                                 add_general (&gr, &stack_size, &cinfo->ret);
290                         break;
291                 }
292                 case MONO_TYPE_TYPEDBYREF:
293                         /* Same as a valuetype with size 24 */
294                         add_general (&gr, &stack_size, &cinfo->ret);
295                         ;
296                         break;
297                 case MONO_TYPE_VOID:
298                         cinfo->ret.storage = ArgNone;
299                         break;
300                 default:
301                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
302                 }
303         }
304
305         /* this */
306         if (sig->hasthis)
307                 add_general (&gr, &stack_size, cinfo->args + 0);
308
309         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
310                 gr = PARAM_REGS;
311                 fr = FLOAT_PARAM_REGS;
312                 
313                 /* Emit the signature cookie just before the implicit arguments */
314                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
315         }
316
317         for (i = 0; i < sig->param_count; ++i) {
318                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
319                 MonoType *ptype;
320
321                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
322                         /* We allways pass the sig cookie on the stack for simplicity */
323                         /* 
324                          * Prevent implicit arguments + the sig cookie from being passed 
325                          * in registers.
326                          */
327                         gr = PARAM_REGS;
328                         fr = FLOAT_PARAM_REGS;
329
330                         /* Emit the signature cookie just before the implicit arguments */
331                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
332                 }
333
334                 if (sig->params [i]->byref) {
335                         add_general (&gr, &stack_size, ainfo);
336                         continue;
337                 }
338                 ptype = mono_type_get_underlying_type (sig->params [i]);
339                 switch (ptype->type) {
340                 case MONO_TYPE_BOOLEAN:
341                 case MONO_TYPE_I1:
342                 case MONO_TYPE_U1:
343                         add_general (&gr, &stack_size, ainfo);
344                         break;
345                 case MONO_TYPE_I2:
346                 case MONO_TYPE_U2:
347                 case MONO_TYPE_CHAR:
348                         add_general (&gr, &stack_size, ainfo);
349                         break;
350                 case MONO_TYPE_I4:
351                 case MONO_TYPE_U4:
352                         add_general (&gr, &stack_size, ainfo);
353                         break;
354                 case MONO_TYPE_I:
355                 case MONO_TYPE_U:
356                 case MONO_TYPE_PTR:
357                 case MONO_TYPE_FNPTR:
358                 case MONO_TYPE_CLASS:
359                 case MONO_TYPE_OBJECT:
360                 case MONO_TYPE_STRING:
361                 case MONO_TYPE_SZARRAY:
362                 case MONO_TYPE_ARRAY:
363                         add_general (&gr, &stack_size, ainfo);
364                         break;
365                 case MONO_TYPE_GENERICINST:
366                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
367                                 add_general (&gr, &stack_size, ainfo);
368                                 break;
369                         }
370                         /* Fall through */
371                 case MONO_TYPE_VALUETYPE:
372                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
373                         break;
374                 case MONO_TYPE_TYPEDBYREF:
375                         stack_size += sizeof (MonoTypedRef);
376                         ainfo->storage = ArgOnStack;
377                         break;
378                 case MONO_TYPE_U8:
379                 case MONO_TYPE_I8:
380                         add_general_pair (&gr, &stack_size, ainfo);
381                         break;
382                 case MONO_TYPE_R4:
383                         add_float (&fr, &stack_size, ainfo, FALSE);
384                         break;
385                 case MONO_TYPE_R8:
386                         add_float (&fr, &stack_size, ainfo, TRUE);
387                         break;
388                 default:
389                         g_error ("unexpected type 0x%x", ptype->type);
390                         g_assert_not_reached ();
391                 }
392         }
393
394         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
395                 gr = PARAM_REGS;
396                 fr = FLOAT_PARAM_REGS;
397                 
398                 /* Emit the signature cookie just before the implicit arguments */
399                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
400         }
401
402 #if defined(__APPLE__)
403         if ((stack_size % 16) != 0) { 
404                 cinfo->need_stack_align = TRUE;
405                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
406         }
407 #endif
408
409         cinfo->stack_usage = stack_size;
410         cinfo->reg_usage = gr;
411         cinfo->freg_usage = fr;
412         return cinfo;
413 }
414
415 /*
416  * mono_arch_get_argument_info:
417  * @csig:  a method signature
418  * @param_count: the number of parameters to consider
419  * @arg_info: an array to store the result infos
420  *
421  * Gathers information on parameters such as size, alignment and
422  * padding. arg_info should be large enought to hold param_count + 1 entries. 
423  *
424  * Returns the size of the activation frame.
425  */
426 int
427 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
428 {
429         int k, frame_size = 0;
430         int size, align, pad;
431         int offset = 8;
432         CallInfo *cinfo;
433
434         cinfo = get_call_info (csig, FALSE);
435
436         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
437                 frame_size += sizeof (gpointer);
438                 offset += 4;
439         }
440
441         arg_info [0].offset = offset;
442
443         if (csig->hasthis) {
444                 frame_size += sizeof (gpointer);
445                 offset += 4;
446         }
447
448         arg_info [0].size = frame_size;
449
450         for (k = 0; k < param_count; k++) {
451                 
452                 if (csig->pinvoke)
453                         size = mono_type_native_stack_size (csig->params [k], &align);
454                 else
455                         size = mono_type_stack_size (csig->params [k], &align);
456
457                 /* ignore alignment for now */
458                 align = 1;
459
460                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
461                 arg_info [k].pad = pad;
462                 frame_size += size;
463                 arg_info [k + 1].pad = 0;
464                 arg_info [k + 1].size = size;
465                 offset += pad;
466                 arg_info [k + 1].offset = offset;
467                 offset += size;
468         }
469
470         align = MONO_ARCH_FRAME_ALIGNMENT;
471         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
472         arg_info [k].pad = pad;
473
474         g_free (cinfo);
475
476         return frame_size;
477 }
478
479 static const guchar cpuid_impl [] = {
480         0x55,                           /* push   %ebp */
481         0x89, 0xe5,                     /* mov    %esp,%ebp */
482         0x53,                           /* push   %ebx */
483         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
484         0x0f, 0xa2,                     /* cpuid   */
485         0x50,                           /* push   %eax */
486         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
487         0x89, 0x18,                     /* mov    %ebx,(%eax) */
488         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
489         0x89, 0x08,                     /* mov    %ecx,(%eax) */
490         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
491         0x89, 0x10,                     /* mov    %edx,(%eax) */
492         0x58,                           /* pop    %eax */
493         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
494         0x89, 0x02,                     /* mov    %eax,(%edx) */
495         0x5b,                           /* pop    %ebx */
496         0xc9,                           /* leave   */
497         0xc3,                           /* ret     */
498 };
499
500 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
501
502 static int 
503 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
504 {
505         int have_cpuid = 0;
506 #ifndef _MSC_VER
507         __asm__  __volatile__ (
508                 "pushfl\n"
509                 "popl %%eax\n"
510                 "movl %%eax, %%edx\n"
511                 "xorl $0x200000, %%eax\n"
512                 "pushl %%eax\n"
513                 "popfl\n"
514                 "pushfl\n"
515                 "popl %%eax\n"
516                 "xorl %%edx, %%eax\n"
517                 "andl $0x200000, %%eax\n"
518                 "movl %%eax, %0"
519                 : "=r" (have_cpuid)
520                 :
521                 : "%eax", "%edx"
522         );
523 #else
524         __asm {
525                 pushfd
526                 pop eax
527                 mov edx, eax
528                 xor eax, 0x200000
529                 push eax
530                 popfd
531                 pushfd
532                 pop eax
533                 xor eax, edx
534                 and eax, 0x200000
535                 mov have_cpuid, eax
536         }
537 #endif
538         if (have_cpuid) {
539                 /* Have to use the code manager to get around WinXP DEP */
540                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
541                 CpuidFunc func;
542                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
543                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
544
545                 func = (CpuidFunc)ptr;
546                 func (id, p_eax, p_ebx, p_ecx, p_edx);
547
548                 mono_code_manager_destroy (codeman);
549
550                 /*
551                  * We use this approach because of issues with gcc and pic code, see:
552                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
553                 __asm__ __volatile__ ("cpuid"
554                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
555                         : "a" (id));
556                 */
557                 return 1;
558         }
559         return 0;
560 }
561
562 /*
563  * Initialize the cpu to execute managed code.
564  */
565 void
566 mono_arch_cpu_init (void)
567 {
568         /* spec compliance requires running with double precision */
569 #ifndef _MSC_VER
570         guint16 fpcw;
571
572         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
573         fpcw &= ~X86_FPCW_PRECC_MASK;
574         fpcw |= X86_FPCW_PREC_DOUBLE;
575         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
576         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
577 #else
578         _control87 (_PC_53, MCW_PC);
579 #endif
580 }
581
582 /*
583  * This function returns the optimizations supported on this cpu.
584  */
585 guint32
586 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
587 {
588         int eax, ebx, ecx, edx;
589         guint32 opts = 0;
590         
591         *exclude_mask = 0;
592         /* Feature Flags function, flags returned in EDX. */
593         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
594                 if (edx & (1 << 15)) {
595                         opts |= MONO_OPT_CMOV;
596                         if (edx & 1)
597                                 opts |= MONO_OPT_FCMOV;
598                         else
599                                 *exclude_mask |= MONO_OPT_FCMOV;
600                 } else
601                         *exclude_mask |= MONO_OPT_CMOV;
602         }
603         return opts;
604 }
605
606 /*
607  * Determine whenever the trap whose info is in SIGINFO is caused by
608  * integer overflow.
609  */
610 gboolean
611 mono_arch_is_int_overflow (void *sigctx, void *info)
612 {
613         MonoContext ctx;
614         guint8* ip;
615
616         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
617
618         ip = (guint8*)ctx.eip;
619
620         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
621                 gint32 reg;
622
623                 /* idiv REG */
624                 switch (x86_modrm_rm (ip [1])) {
625                 case X86_EAX:
626                         reg = ctx.eax;
627                         break;
628                 case X86_ECX:
629                         reg = ctx.ecx;
630                         break;
631                 case X86_EDX:
632                         reg = ctx.edx;
633                         break;
634                 case X86_EBX:
635                         reg = ctx.ebx;
636                         break;
637                 case X86_ESI:
638                         reg = ctx.esi;
639                         break;
640                 case X86_EDI:
641                         reg = ctx.edi;
642                         break;
643                 default:
644                         g_assert_not_reached ();
645                         reg = -1;
646                 }
647
648                 if (reg == -1)
649                         return TRUE;
650         }
651                         
652         return FALSE;
653 }
654
655 static gboolean
656 is_regsize_var (MonoType *t) {
657         if (t->byref)
658                 return TRUE;
659         switch (mono_type_get_underlying_type (t)->type) {
660         case MONO_TYPE_I4:
661         case MONO_TYPE_U4:
662         case MONO_TYPE_I:
663         case MONO_TYPE_U:
664         case MONO_TYPE_PTR:
665         case MONO_TYPE_FNPTR:
666                 return TRUE;
667         case MONO_TYPE_OBJECT:
668         case MONO_TYPE_STRING:
669         case MONO_TYPE_CLASS:
670         case MONO_TYPE_SZARRAY:
671         case MONO_TYPE_ARRAY:
672                 return TRUE;
673         case MONO_TYPE_GENERICINST:
674                 if (!mono_type_generic_inst_is_valuetype (t))
675                         return TRUE;
676                 return FALSE;
677         case MONO_TYPE_VALUETYPE:
678                 return FALSE;
679         }
680         return FALSE;
681 }
682
683 GList *
684 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
685 {
686         GList *vars = NULL;
687         int i;
688
689         for (i = 0; i < cfg->num_varinfo; i++) {
690                 MonoInst *ins = cfg->varinfo [i];
691                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
692
693                 /* unused vars */
694                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
695                         continue;
696
697                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
698                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
699                         continue;
700
701                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
702                  * 8bit quantities in caller saved registers on x86 */
703                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
704                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
705                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
706                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
707                         g_assert (i == vmv->idx);
708                         vars = g_list_prepend (vars, vmv);
709                 }
710         }
711
712         vars = mono_varlist_sort (cfg, vars, 0);
713
714         return vars;
715 }
716
717 GList *
718 mono_arch_get_global_int_regs (MonoCompile *cfg)
719 {
720         GList *regs = NULL;
721
722         /* we can use 3 registers for global allocation */
723         regs = g_list_prepend (regs, (gpointer)X86_EBX);
724         regs = g_list_prepend (regs, (gpointer)X86_ESI);
725         regs = g_list_prepend (regs, (gpointer)X86_EDI);
726
727         return regs;
728 }
729
730 /*
731  * mono_arch_regalloc_cost:
732  *
733  *  Return the cost, in number of memory references, of the action of 
734  * allocating the variable VMV into a register during global register
735  * allocation.
736  */
737 guint32
738 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
739 {
740         MonoInst *ins = cfg->varinfo [vmv->idx];
741
742         if (cfg->method->save_lmf)
743                 /* The register is already saved */
744                 return (ins->opcode == OP_ARG) ? 1 : 0;
745         else
746                 /* push+pop+possible load if it is an argument */
747                 return (ins->opcode == OP_ARG) ? 3 : 2;
748 }
749  
750 /*
751  * Set var information according to the calling convention. X86 version.
752  * The locals var stuff should most likely be split in another method.
753  */
754 void
755 mono_arch_allocate_vars (MonoCompile *cfg)
756 {
757         MonoMethodSignature *sig;
758         MonoMethodHeader *header;
759         MonoInst *inst;
760         guint32 locals_stack_size, locals_stack_align;
761         int i, offset;
762         gint32 *offsets;
763         CallInfo *cinfo;
764
765         header = mono_method_get_header (cfg->method);
766         sig = mono_method_signature (cfg->method);
767
768         cinfo = get_call_info (sig, FALSE);
769
770         cfg->frame_reg = MONO_ARCH_BASEREG;
771         offset = 0;
772
773         /* Reserve space to save LMF and caller saved registers */
774
775         if (cfg->method->save_lmf) {
776                 offset += sizeof (MonoLMF);
777         } else {
778                 if (cfg->used_int_regs & (1 << X86_EBX)) {
779                         offset += 4;
780                 }
781
782                 if (cfg->used_int_regs & (1 << X86_EDI)) {
783                         offset += 4;
784                 }
785
786                 if (cfg->used_int_regs & (1 << X86_ESI)) {
787                         offset += 4;
788                 }
789         }
790
791         switch (cinfo->ret.storage) {
792         case ArgValuetypeInReg:
793                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
794                 offset += 8;
795                 cfg->ret->opcode = OP_REGOFFSET;
796                 cfg->ret->inst_basereg = X86_EBP;
797                 cfg->ret->inst_offset = - offset;
798                 break;
799         default:
800                 break;
801         }
802
803         /* Allocate locals */
804         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
805         if (locals_stack_align) {
806                 offset += (locals_stack_align - 1);
807                 offset &= ~(locals_stack_align - 1);
808         }
809         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
810                 if (offsets [i] != -1) {
811                         MonoInst *inst = cfg->varinfo [i];
812                         inst->opcode = OP_REGOFFSET;
813                         inst->inst_basereg = X86_EBP;
814                         inst->inst_offset = - (offset + offsets [i]);
815                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
816                 }
817         }
818         g_free (offsets);
819         offset += locals_stack_size;
820
821
822         /*
823          * Allocate arguments+return value
824          */
825
826         switch (cinfo->ret.storage) {
827         case ArgOnStack:
828                 cfg->ret->opcode = OP_REGOFFSET;
829                 cfg->ret->inst_basereg = X86_EBP;
830                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
831                 break;
832         case ArgValuetypeInReg:
833                 break;
834         case ArgInIReg:
835                 cfg->ret->opcode = OP_REGVAR;
836                 cfg->ret->inst_c0 = cinfo->ret.reg;
837                 break;
838         case ArgNone:
839         case ArgOnFloatFpStack:
840         case ArgOnDoubleFpStack:
841                 break;
842         default:
843                 g_assert_not_reached ();
844         }
845
846         if (sig->call_convention == MONO_CALL_VARARG) {
847                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
848                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
849         }
850
851         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
852                 ArgInfo *ainfo = &cinfo->args [i];
853                 inst = cfg->varinfo [i];
854                 if (inst->opcode != OP_REGVAR) {
855                         inst->opcode = OP_REGOFFSET;
856                         inst->inst_basereg = X86_EBP;
857                 }
858                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
859         }
860
861         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
862         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
863
864         cfg->stack_offset = offset;
865
866         g_free (cinfo);
867 }
868
869 void
870 mono_arch_create_vars (MonoCompile *cfg)
871 {
872         MonoMethodSignature *sig;
873         CallInfo *cinfo;
874
875         sig = mono_method_signature (cfg->method);
876
877         cinfo = get_call_info (sig, FALSE);
878
879         if (cinfo->ret.storage == ArgValuetypeInReg)
880                 cfg->ret_var_is_local = TRUE;
881
882         g_free (cinfo);
883 }
884
885 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
886  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
887  */
888
889 /* 
890  * take the arguments and generate the arch-specific
891  * instructions to properly call the function in call.
892  * This includes pushing, moving arguments to the right register
893  * etc.
894  */
895 MonoCallInst*
896 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
897         MonoInst *arg, *in;
898         MonoMethodSignature *sig;
899         int i, n;
900         CallInfo *cinfo;
901         int sentinelpos;
902
903         sig = call->signature;
904         n = sig->param_count + sig->hasthis;
905
906         cinfo = get_call_info (sig, FALSE);
907
908         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
909                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
910
911         for (i = 0; i < n; ++i) {
912                 ArgInfo *ainfo = cinfo->args + i;
913
914                 /* Emit the signature cookie just before the implicit arguments */
915                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
916                         MonoMethodSignature *tmp_sig;
917                         MonoInst *sig_arg;
918
919                         /* FIXME: Add support for signature tokens to AOT */
920                         cfg->disable_aot = TRUE;
921                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
922
923                         /*
924                          * mono_ArgIterator_Setup assumes the signature cookie is 
925                          * passed first and all the arguments which were before it are
926                          * passed on the stack after the signature. So compensate by 
927                          * passing a different signature.
928                          */
929                         tmp_sig = mono_metadata_signature_dup (call->signature);
930                         tmp_sig->param_count -= call->signature->sentinelpos;
931                         tmp_sig->sentinelpos = 0;
932                         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
933
934                         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
935                         sig_arg->inst_p0 = tmp_sig;
936
937                         arg->inst_left = sig_arg;
938                         arg->type = STACK_PTR;
939                         /* prepend, so they get reversed */
940                         arg->next = call->out_args;
941                         call->out_args = arg;
942                 }
943
944                 if (is_virtual && i == 0) {
945                         /* the argument will be attached to the call instrucion */
946                         in = call->args [i];
947                 } else {
948                         MonoType *t;
949
950                         if (i >= sig->hasthis)
951                                 t = sig->params [i - sig->hasthis];
952                         else
953                                 t = &mono_defaults.int_class->byval_arg;
954                         t = mono_type_get_underlying_type (t);
955
956                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
957                         in = call->args [i];
958                         arg->cil_code = in->cil_code;
959                         arg->inst_left = in;
960                         arg->type = in->type;
961                         /* prepend, so they get reversed */
962                         arg->next = call->out_args;
963                         call->out_args = arg;
964
965                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
966                                 gint align;
967                                 guint32 size;
968
969                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
970                                         size = sizeof (MonoTypedRef);
971                                         align = sizeof (gpointer);
972                                 }
973                                 else
974                                         if (sig->pinvoke)
975                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
976                                         else
977                                                 size = mono_type_stack_size (&in->klass->byval_arg, &align);
978                                 arg->opcode = OP_OUTARG_VT;
979                                 arg->klass = in->klass;
980                                 arg->unused = sig->pinvoke;
981                                 arg->inst_imm = size; 
982                         }
983                         else {
984                                 switch (ainfo->storage) {
985                                 case ArgOnStack:
986                                         arg->opcode = OP_OUTARG;
987                                         if (!t->byref) {
988                                                 if (t->type == MONO_TYPE_R4)
989                                                         arg->opcode = OP_OUTARG_R4;
990                                                 else
991                                                         if (t->type == MONO_TYPE_R8)
992                                                                 arg->opcode = OP_OUTARG_R8;
993                                         }
994                                         break;
995                                 default:
996                                         g_assert_not_reached ();
997                                 }
998                         }
999                 }
1000         }
1001
1002         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1003                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1004                         MonoInst *zero_inst;
1005                         /*
1006                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1007                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1008                          * before calling the function. So we add a dummy instruction to represent pushing the 
1009                          * struct return address to the stack. The return address will be saved to this stack slot 
1010                          * by the code emitted in this_vret_args.
1011                          */
1012                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1013                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1014                         zero_inst->inst_p0 = 0;
1015                         arg->inst_left = zero_inst;
1016                         arg->type = STACK_PTR;
1017                         /* prepend, so they get reversed */
1018                         arg->next = call->out_args;
1019                         call->out_args = arg;
1020                 }
1021                 else
1022                         /* if the function returns a struct, the called method already does a ret $0x4 */
1023                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1024                                 cinfo->stack_usage -= 4;
1025         }
1026         
1027         call->stack_usage = cinfo->stack_usage;
1028
1029 #if defined(__APPLE__)
1030         if (cinfo->need_stack_align) {
1031                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1032                 arg->inst_c0 = cinfo->stack_align_amount;
1033                 arg->next = call->out_args;
1034                 call->out_args = arg;
1035         }
1036 #endif 
1037
1038         g_free (cinfo);
1039
1040         return call;
1041 }
1042
1043 /*
1044  * Allow tracing to work with this interface (with an optional argument)
1045  */
1046 void*
1047 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1048 {
1049         guchar *code = p;
1050
1051         /* if some args are passed in registers, we need to save them here */
1052         x86_push_reg (code, X86_EBP);
1053
1054         if (cfg->compile_aot) {
1055                 x86_push_imm (code, cfg->method);
1056                 x86_mov_reg_imm (code, X86_EAX, func);
1057                 x86_call_reg (code, X86_EAX);
1058         } else {
1059                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1060                 x86_push_imm (code, cfg->method);
1061                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1062                 x86_call_code (code, 0);
1063         }
1064         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1065
1066         return code;
1067 }
1068
1069 enum {
1070         SAVE_NONE,
1071         SAVE_STRUCT,
1072         SAVE_EAX,
1073         SAVE_EAX_EDX,
1074         SAVE_FP
1075 };
1076
1077 void*
1078 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1079 {
1080         guchar *code = p;
1081         int arg_size = 0, save_mode = SAVE_NONE;
1082         MonoMethod *method = cfg->method;
1083         
1084         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1085         case MONO_TYPE_VOID:
1086                 /* special case string .ctor icall */
1087                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1088                         save_mode = SAVE_EAX;
1089                 else
1090                         save_mode = SAVE_NONE;
1091                 break;
1092         case MONO_TYPE_I8:
1093         case MONO_TYPE_U8:
1094                 save_mode = SAVE_EAX_EDX;
1095                 break;
1096         case MONO_TYPE_R4:
1097         case MONO_TYPE_R8:
1098                 save_mode = SAVE_FP;
1099                 break;
1100         case MONO_TYPE_VALUETYPE:
1101                 save_mode = SAVE_STRUCT;
1102                 break;
1103         default:
1104                 save_mode = SAVE_EAX;
1105                 break;
1106         }
1107
1108         switch (save_mode) {
1109         case SAVE_EAX_EDX:
1110                 x86_push_reg (code, X86_EDX);
1111                 x86_push_reg (code, X86_EAX);
1112                 if (enable_arguments) {
1113                         x86_push_reg (code, X86_EDX);
1114                         x86_push_reg (code, X86_EAX);
1115                         arg_size = 8;
1116                 }
1117                 break;
1118         case SAVE_EAX:
1119                 x86_push_reg (code, X86_EAX);
1120                 if (enable_arguments) {
1121                         x86_push_reg (code, X86_EAX);
1122                         arg_size = 4;
1123                 }
1124                 break;
1125         case SAVE_FP:
1126                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1127                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1128                 if (enable_arguments) {
1129                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1130                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1131                         arg_size = 8;
1132                 }
1133                 break;
1134         case SAVE_STRUCT:
1135                 if (enable_arguments) {
1136                         x86_push_membase (code, X86_EBP, 8);
1137                         arg_size = 4;
1138                 }
1139                 break;
1140         case SAVE_NONE:
1141         default:
1142                 break;
1143         }
1144
1145         if (cfg->compile_aot) {
1146                 x86_push_imm (code, method);
1147                 x86_mov_reg_imm (code, X86_EAX, func);
1148                 x86_call_reg (code, X86_EAX);
1149         } else {
1150                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1151                 x86_push_imm (code, method);
1152                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1153                 x86_call_code (code, 0);
1154         }
1155         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1156
1157         switch (save_mode) {
1158         case SAVE_EAX_EDX:
1159                 x86_pop_reg (code, X86_EAX);
1160                 x86_pop_reg (code, X86_EDX);
1161                 break;
1162         case SAVE_EAX:
1163                 x86_pop_reg (code, X86_EAX);
1164                 break;
1165         case SAVE_FP:
1166                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1167                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1168                 break;
1169         case SAVE_NONE:
1170         default:
1171                 break;
1172         }
1173
1174         return code;
1175 }
1176
1177 #define EMIT_COND_BRANCH(ins,cond,sign) \
1178 if (ins->flags & MONO_INST_BRLABEL) { \
1179         if (ins->inst_i0->inst_c0) { \
1180                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1181         } else { \
1182                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1183                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1184                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1185                         x86_branch8 (code, cond, 0, sign); \
1186                 else \
1187                         x86_branch32 (code, cond, 0, sign); \
1188         } \
1189 } else { \
1190         if (ins->inst_true_bb->native_offset) { \
1191                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1192         } else { \
1193                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1194                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1195                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1196                         x86_branch8 (code, cond, 0, sign); \
1197                 else \
1198                         x86_branch32 (code, cond, 0, sign); \
1199         } \
1200 }
1201
1202 /*  
1203  *      Emit an exception if condition is fail and
1204  *  if possible do a directly branch to target 
1205  */
1206 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1207         do {                                                        \
1208                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1209                 if (tins == NULL) {                                                                             \
1210                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1211                                         MONO_PATCH_INFO_EXC, exc_name);  \
1212                         x86_branch32 (code, cond, 0, signed);               \
1213                 } else {        \
1214                         EMIT_COND_BRANCH (tins, cond, signed);  \
1215                 }                       \
1216         } while (0); 
1217
1218 #define EMIT_FPCOMPARE(code) do { \
1219         x86_fcompp (code); \
1220         x86_fnstsw (code); \
1221 } while (0); 
1222
1223
1224 static guint8*
1225 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1226 {
1227         if (cfg->compile_aot) {
1228                 guint32 got_reg = X86_EAX;
1229
1230                 if (cfg->compile_aot) {          
1231                         /*
1232                          * Since the patches are generated by the back end, there is
1233                          * no way to generate a got_var at this point.
1234                          */
1235                         g_assert (cfg->got_var);
1236
1237                         if (cfg->got_var->opcode == OP_REGOFFSET)
1238                                 x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
1239                         else
1240                                 got_reg = cfg->got_var->dreg;
1241                 }
1242
1243                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1244                 x86_call_membase (code, got_reg, 0xf0f0f0f0);
1245         }
1246         else {
1247                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1248                 x86_call_code (code, 0);
1249         }
1250
1251         return code;
1252 }
1253
1254 /* FIXME: Add more instructions */
1255 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1256
1257 static void
1258 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1259 {
1260         MonoInst *ins, *last_ins = NULL;
1261         ins = bb->code;
1262
1263         while (ins) {
1264
1265                 switch (ins->opcode) {
1266                 case OP_ICONST:
1267                         /* reg = 0 -> XOR (reg, reg) */
1268                         /* XOR sets cflags on x86, so we cant do it always */
1269                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1270                                 ins->opcode = CEE_XOR;
1271                                 ins->sreg1 = ins->dreg;
1272                                 ins->sreg2 = ins->dreg;
1273                         }
1274                         break;
1275                 case OP_MUL_IMM: 
1276                         /* remove unnecessary multiplication with 1 */
1277                         if (ins->inst_imm == 1) {
1278                                 if (ins->dreg != ins->sreg1) {
1279                                         ins->opcode = OP_MOVE;
1280                                 } else {
1281                                         last_ins->next = ins->next;
1282                                         ins = ins->next;
1283                                         continue;
1284                                 }
1285                         }
1286                         break;
1287                 case OP_COMPARE_IMM:
1288                         /* OP_COMPARE_IMM (reg, 0) 
1289                          * --> 
1290                          * OP_X86_TEST_NULL (reg) 
1291                          */
1292                         if (!ins->inst_imm)
1293                                 ins->opcode = OP_X86_TEST_NULL;
1294                         break;
1295                 case OP_X86_COMPARE_MEMBASE_IMM:
1296                         /* 
1297                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1298                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1299                          * -->
1300                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1301                          * OP_COMPARE_IMM reg, imm
1302                          *
1303                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1304                          */
1305                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1306                             ins->inst_basereg == last_ins->inst_destbasereg &&
1307                             ins->inst_offset == last_ins->inst_offset) {
1308                                         ins->opcode = OP_COMPARE_IMM;
1309                                         ins->sreg1 = last_ins->sreg1;
1310
1311                                         /* check if we can remove cmp reg,0 with test null */
1312                                         if (!ins->inst_imm)
1313                                                 ins->opcode = OP_X86_TEST_NULL;
1314                                 }
1315
1316                         break;
1317                 case OP_LOAD_MEMBASE:
1318                 case OP_LOADI4_MEMBASE:
1319                         /* 
1320                          * Note: if reg1 = reg2 the load op is removed
1321                          *
1322                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1323                          * OP_LOAD_MEMBASE offset(basereg), reg2
1324                          * -->
1325                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1326                          * OP_MOVE reg1, reg2
1327                          */
1328                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1329                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1330                             ins->inst_basereg == last_ins->inst_destbasereg &&
1331                             ins->inst_offset == last_ins->inst_offset) {
1332                                 if (ins->dreg == last_ins->sreg1) {
1333                                         last_ins->next = ins->next;                             
1334                                         ins = ins->next;                                
1335                                         continue;
1336                                 } else {
1337                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1338                                         ins->opcode = OP_MOVE;
1339                                         ins->sreg1 = last_ins->sreg1;
1340                                 }
1341
1342                         /* 
1343                          * Note: reg1 must be different from the basereg in the second load
1344                          * Note: if reg1 = reg2 is equal then second load is removed
1345                          *
1346                          * OP_LOAD_MEMBASE offset(basereg), reg1
1347                          * OP_LOAD_MEMBASE offset(basereg), reg2
1348                          * -->
1349                          * OP_LOAD_MEMBASE offset(basereg), reg1
1350                          * OP_MOVE reg1, reg2
1351                          */
1352                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1353                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1354                               ins->inst_basereg != last_ins->dreg &&
1355                               ins->inst_basereg == last_ins->inst_basereg &&
1356                               ins->inst_offset == last_ins->inst_offset) {
1357
1358                                 if (ins->dreg == last_ins->dreg) {
1359                                         last_ins->next = ins->next;                             
1360                                         ins = ins->next;                                
1361                                         continue;
1362                                 } else {
1363                                         ins->opcode = OP_MOVE;
1364                                         ins->sreg1 = last_ins->dreg;
1365                                 }
1366
1367                                 //g_assert_not_reached ();
1368
1369 #if 0
1370                         /* 
1371                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1372                          * OP_LOAD_MEMBASE offset(basereg), reg
1373                          * -->
1374                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1375                          * OP_ICONST reg, imm
1376                          */
1377                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1378                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1379                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1380                                    ins->inst_offset == last_ins->inst_offset) {
1381                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1382                                 ins->opcode = OP_ICONST;
1383                                 ins->inst_c0 = last_ins->inst_imm;
1384                                 g_assert_not_reached (); // check this rule
1385 #endif
1386                         }
1387                         break;
1388                 case OP_LOADU1_MEMBASE:
1389                 case OP_LOADI1_MEMBASE:
1390                         /* 
1391                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1392                          * OP_LOAD_MEMBASE offset(basereg), reg2
1393                          * -->
1394                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1395                          * CONV_I2/U2 reg1, reg2
1396                          */
1397                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1398                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1399                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1400                                         ins->inst_offset == last_ins->inst_offset) {
1401                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1402                                 ins->sreg1 = last_ins->sreg1;
1403                         }
1404                         break;
1405                 case OP_LOADU2_MEMBASE:
1406                 case OP_LOADI2_MEMBASE:
1407                         /* 
1408                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1409                          * OP_LOAD_MEMBASE offset(basereg), reg2
1410                          * -->
1411                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1412                          * CONV_I2/U2 reg1, reg2
1413                          */
1414                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1415                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1416                                         ins->inst_offset == last_ins->inst_offset) {
1417                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1418                                 ins->sreg1 = last_ins->sreg1;
1419                         }
1420                         break;
1421                 case CEE_CONV_I4:
1422                 case CEE_CONV_U4:
1423                 case OP_MOVE:
1424                         /*
1425                          * Removes:
1426                          *
1427                          * OP_MOVE reg, reg 
1428                          */
1429                         if (ins->dreg == ins->sreg1) {
1430                                 if (last_ins)
1431                                         last_ins->next = ins->next;                             
1432                                 ins = ins->next;
1433                                 continue;
1434                         }
1435                         /* 
1436                          * Removes:
1437                          *
1438                          * OP_MOVE sreg, dreg 
1439                          * OP_MOVE dreg, sreg
1440                          */
1441                         if (last_ins && last_ins->opcode == OP_MOVE &&
1442                             ins->sreg1 == last_ins->dreg &&
1443                             ins->dreg == last_ins->sreg1) {
1444                                 last_ins->next = ins->next;                             
1445                                 ins = ins->next;                                
1446                                 continue;
1447                         }
1448                         break;
1449                         
1450                 case OP_X86_PUSH_MEMBASE:
1451                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1452                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1453                             ins->inst_basereg == last_ins->inst_destbasereg &&
1454                             ins->inst_offset == last_ins->inst_offset) {
1455                                     ins->opcode = OP_X86_PUSH;
1456                                     ins->sreg1 = last_ins->sreg1;
1457                         }
1458                         break;
1459                 }
1460                 last_ins = ins;
1461                 ins = ins->next;
1462         }
1463         bb->last_ins = last_ins;
1464 }
1465
1466 static const int 
1467 branch_cc_table [] = {
1468         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1469         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1470         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1471 };
1472
1473 static const char*const * ins_spec = pentium_desc;
1474
1475 /*#include "cprop.c"*/
1476 void
1477 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1478 {
1479         mono_local_regalloc (cfg, bb);
1480 }
1481
1482 static unsigned char*
1483 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1484 {
1485         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1486         x86_fnstcw_membase(code, X86_ESP, 0);
1487         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1488         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1489         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1490         x86_fldcw_membase (code, X86_ESP, 2);
1491         if (size == 8) {
1492                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1493                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1494                 x86_pop_reg (code, dreg);
1495                 /* FIXME: need the high register 
1496                  * x86_pop_reg (code, dreg_high);
1497                  */
1498         } else {
1499                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1500                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1501                 x86_pop_reg (code, dreg);
1502         }
1503         x86_fldcw_membase (code, X86_ESP, 0);
1504         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1505
1506         if (size == 1)
1507                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1508         else if (size == 2)
1509                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1510         return code;
1511 }
1512
1513 static unsigned char*
1514 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1515 {
1516         int sreg = tree->sreg1;
1517         int need_touch = FALSE;
1518
1519 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1520         need_touch = TRUE;
1521 #endif
1522
1523         if (need_touch) {
1524                 guint8* br[5];
1525
1526                 /*
1527                  * Under Windows:
1528                  * If requested stack size is larger than one page,
1529                  * perform stack-touch operation
1530                  */
1531                 /*
1532                  * Generate stack probe code.
1533                  * Under Windows, it is necessary to allocate one page at a time,
1534                  * "touching" stack after each successful sub-allocation. This is
1535                  * because of the way stack growth is implemented - there is a
1536                  * guard page before the lowest stack page that is currently commited.
1537                  * Stack normally grows sequentially so OS traps access to the
1538                  * guard page and commits more pages when needed.
1539                  */
1540                 x86_test_reg_imm (code, sreg, ~0xFFF);
1541                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1542
1543                 br[2] = code; /* loop */
1544                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1545                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1546
1547                 /* 
1548                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1549                  * that follows only initializes the last part of the area.
1550                  */
1551                 /* Same as the init code below with size==0x1000 */
1552                 if (tree->flags & MONO_INST_INIT) {
1553                         x86_push_reg (code, X86_EAX);
1554                         x86_push_reg (code, X86_ECX);
1555                         x86_push_reg (code, X86_EDI);
1556                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1557                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1558                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1559                         x86_cld (code);
1560                         x86_prefix (code, X86_REP_PREFIX);
1561                         x86_stosl (code);
1562                         x86_pop_reg (code, X86_EDI);
1563                         x86_pop_reg (code, X86_ECX);
1564                         x86_pop_reg (code, X86_EAX);
1565                 }
1566
1567                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1568                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1569                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1570                 x86_patch (br[3], br[2]);
1571                 x86_test_reg_reg (code, sreg, sreg);
1572                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1573                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1574
1575                 br[1] = code; x86_jump8 (code, 0);
1576
1577                 x86_patch (br[0], code);
1578                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1579                 x86_patch (br[1], code);
1580                 x86_patch (br[4], code);
1581         }
1582         else
1583                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1584
1585         if (tree->flags & MONO_INST_INIT) {
1586                 int offset = 0;
1587                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1588                         x86_push_reg (code, X86_EAX);
1589                         offset += 4;
1590                 }
1591                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1592                         x86_push_reg (code, X86_ECX);
1593                         offset += 4;
1594                 }
1595                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1596                         x86_push_reg (code, X86_EDI);
1597                         offset += 4;
1598                 }
1599                 
1600                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1601                 if (sreg != X86_ECX)
1602                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1603                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1604                                 
1605                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1606                 x86_cld (code);
1607                 x86_prefix (code, X86_REP_PREFIX);
1608                 x86_stosl (code);
1609                 
1610                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1611                         x86_pop_reg (code, X86_EDI);
1612                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1613                         x86_pop_reg (code, X86_ECX);
1614                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1615                         x86_pop_reg (code, X86_EAX);
1616         }
1617         return code;
1618 }
1619
1620
1621 static guint8*
1622 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1623 {
1624         CallInfo *cinfo;
1625         int quad;
1626
1627         /* Move return value to the target register */
1628         switch (ins->opcode) {
1629         case CEE_CALL:
1630         case OP_CALL_REG:
1631         case OP_CALL_MEMBASE:
1632                 if (ins->dreg != X86_EAX)
1633                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1634                 break;
1635         case OP_VCALL:
1636         case OP_VCALL_REG:
1637         case OP_VCALL_MEMBASE:
1638                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
1639                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1640                         /* Pop the destination address from the stack */
1641                         x86_pop_reg (code, X86_ECX);
1642                         
1643                         for (quad = 0; quad < 2; quad ++) {
1644                                 switch (cinfo->ret.pair_storage [quad]) {
1645                                 case ArgInIReg:
1646                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1647                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1648                                         break;
1649                                 case ArgNone:
1650                                         break;
1651                                 default:
1652                                         g_assert_not_reached ();
1653                                 }
1654                         }
1655                 }
1656                 g_free (cinfo);
1657         default:
1658                 break;
1659         }
1660
1661         return code;
1662 }
1663
1664 /*
1665  * emit_tls_get:
1666  * @code: buffer to store code to
1667  * @dreg: hard register where to place the result
1668  * @tls_offset: offset info
1669  *
1670  * emit_tls_get emits in @code the native code that puts in the dreg register
1671  * the item in the thread local storage identified by tls_offset.
1672  *
1673  * Returns: a pointer to the end of the stored code
1674  */
1675 static guint8*
1676 emit_tls_get (guint8* code, int dreg, int tls_offset)
1677 {
1678 #ifdef PLATFORM_WIN32
1679         /* 
1680          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1681          * Journal and/or a disassembly of the TlsGet () function.
1682          */
1683         g_assert (tls_offset < 64);
1684         x86_prefix (code, X86_FS_PREFIX);
1685         x86_mov_reg_mem (code, dreg, 0x18, 4);
1686         /* Dunno what this does but TlsGetValue () contains it */
1687         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1688         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1689 #else
1690         if (optimize_for_xen) {
1691                 x86_prefix (code, X86_GS_PREFIX);
1692                 x86_mov_reg_mem (code, dreg, 0, 4);
1693                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1694         } else {
1695                 x86_prefix (code, X86_GS_PREFIX);
1696                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1697         }
1698 #endif
1699         return code;
1700 }
1701
1702 #define REAL_PRINT_REG(text,reg) \
1703 mono_assert (reg >= 0); \
1704 x86_push_reg (code, X86_EAX); \
1705 x86_push_reg (code, X86_EDX); \
1706 x86_push_reg (code, X86_ECX); \
1707 x86_push_reg (code, reg); \
1708 x86_push_imm (code, reg); \
1709 x86_push_imm (code, text " %d %p\n"); \
1710 x86_mov_reg_imm (code, X86_EAX, printf); \
1711 x86_call_reg (code, X86_EAX); \
1712 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1713 x86_pop_reg (code, X86_ECX); \
1714 x86_pop_reg (code, X86_EDX); \
1715 x86_pop_reg (code, X86_EAX);
1716
1717 /* benchmark and set based on cpu */
1718 #define LOOP_ALIGNMENT 8
1719 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
1720
1721 void
1722 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1723 {
1724         MonoInst *ins;
1725         MonoCallInst *call;
1726         guint offset;
1727         guint8 *code = cfg->native_code + cfg->code_len;
1728         MonoInst *last_ins = NULL;
1729         guint last_offset = 0;
1730         int max_len, cpos;
1731
1732         if (cfg->opt & MONO_OPT_PEEPHOLE)
1733                 peephole_pass (cfg, bb);
1734
1735         if (cfg->opt & MONO_OPT_LOOP) {
1736                 int pad, align = LOOP_ALIGNMENT;
1737                 /* set alignment depending on cpu */
1738                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
1739                         pad = align - pad;
1740                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
1741                         x86_padding (code, pad);
1742                         cfg->code_len += pad;
1743                         bb->native_offset = cfg->code_len;
1744                 }
1745         }
1746
1747         if (cfg->verbose_level > 2)
1748                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1749
1750         cpos = bb->max_offset;
1751
1752         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
1753                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
1754                 g_assert (!cfg->compile_aot);
1755                 cpos += 6;
1756
1757                 cov->data [bb->dfn].cil_code = bb->cil_code;
1758                 /* this is not thread save, but good enough */
1759                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
1760         }
1761
1762         offset = code - cfg->native_code;
1763
1764         ins = bb->code;
1765         while (ins) {
1766                 offset = code - cfg->native_code;
1767
1768                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
1769
1770                 if (offset > (cfg->code_size - max_len - 16)) {
1771                         cfg->code_size *= 2;
1772                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
1773                         code = cfg->native_code + offset;
1774                         mono_jit_stats.code_reallocs++;
1775                 }
1776
1777                 mono_debug_record_line_number (cfg, ins, offset);
1778
1779                 switch (ins->opcode) {
1780                 case OP_BIGMUL:
1781                         x86_mul_reg (code, ins->sreg2, TRUE);
1782                         break;
1783                 case OP_BIGMUL_UN:
1784                         x86_mul_reg (code, ins->sreg2, FALSE);
1785                         break;
1786                 case OP_X86_SETEQ_MEMBASE:
1787                 case OP_X86_SETNE_MEMBASE:
1788                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
1789                                          ins->inst_basereg, ins->inst_offset, TRUE);
1790                         break;
1791                 case OP_STOREI1_MEMBASE_IMM:
1792                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
1793                         break;
1794                 case OP_STOREI2_MEMBASE_IMM:
1795                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
1796                         break;
1797                 case OP_STORE_MEMBASE_IMM:
1798                 case OP_STOREI4_MEMBASE_IMM:
1799                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
1800                         break;
1801                 case OP_STOREI1_MEMBASE_REG:
1802                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
1803                         break;
1804                 case OP_STOREI2_MEMBASE_REG:
1805                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
1806                         break;
1807                 case OP_STORE_MEMBASE_REG:
1808                 case OP_STOREI4_MEMBASE_REG:
1809                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
1810                         break;
1811                 case CEE_LDIND_I:
1812                 case CEE_LDIND_I4:
1813                 case CEE_LDIND_U4:
1814                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
1815                         break;
1816                 case OP_LOADU4_MEM:
1817                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
1818                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
1819                         break;
1820                 case OP_LOAD_MEMBASE:
1821                 case OP_LOADI4_MEMBASE:
1822                 case OP_LOADU4_MEMBASE:
1823                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
1824                         break;
1825                 case OP_LOADU1_MEMBASE:
1826                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
1827                         break;
1828                 case OP_LOADI1_MEMBASE:
1829                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
1830                         break;
1831                 case OP_LOADU2_MEMBASE:
1832                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
1833                         break;
1834                 case OP_LOADI2_MEMBASE:
1835                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
1836                         break;
1837                 case CEE_CONV_I1:
1838                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
1839                         break;
1840                 case CEE_CONV_I2:
1841                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
1842                         break;
1843                 case CEE_CONV_U1:
1844                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
1845                         break;
1846                 case CEE_CONV_U2:
1847                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
1848                         break;
1849                 case OP_COMPARE:
1850                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
1851                         break;
1852                 case OP_COMPARE_IMM:
1853                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
1854                         break;
1855                 case OP_X86_COMPARE_MEMBASE_REG:
1856                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
1857                         break;
1858                 case OP_X86_COMPARE_MEMBASE_IMM:
1859                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1860                         break;
1861                 case OP_X86_COMPARE_MEMBASE8_IMM:
1862                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1863                         break;
1864                 case OP_X86_COMPARE_REG_MEMBASE:
1865                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
1866                         break;
1867                 case OP_X86_COMPARE_MEM_IMM:
1868                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
1869                         break;
1870                 case OP_X86_TEST_NULL:
1871                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
1872                         break;
1873                 case OP_X86_ADD_MEMBASE_IMM:
1874                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1875                         break;
1876                 case OP_X86_ADD_MEMBASE:
1877                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
1878                         break;
1879                 case OP_X86_SUB_MEMBASE_IMM:
1880                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1881                         break;
1882                 case OP_X86_SUB_MEMBASE:
1883                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
1884                         break;
1885                 case OP_X86_AND_MEMBASE_IMM:
1886                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1887                         break;
1888                 case OP_X86_OR_MEMBASE_IMM:
1889                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1890                         break;
1891                 case OP_X86_XOR_MEMBASE_IMM:
1892                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1893                         break;
1894                 case OP_X86_INC_MEMBASE:
1895                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
1896                         break;
1897                 case OP_X86_INC_REG:
1898                         x86_inc_reg (code, ins->dreg);
1899                         break;
1900                 case OP_X86_DEC_MEMBASE:
1901                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
1902                         break;
1903                 case OP_X86_DEC_REG:
1904                         x86_dec_reg (code, ins->dreg);
1905                         break;
1906                 case OP_X86_MUL_MEMBASE:
1907                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
1908                         break;
1909                 case CEE_BREAK:
1910                         x86_breakpoint (code);
1911                         break;
1912                 case OP_ADDCC:
1913                 case CEE_ADD:
1914                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
1915                         break;
1916                 case OP_ADC:
1917                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
1918                         break;
1919                 case OP_ADDCC_IMM:
1920                 case OP_ADD_IMM:
1921                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
1922                         break;
1923                 case OP_ADC_IMM:
1924                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
1925                         break;
1926                 case OP_SUBCC:
1927                 case CEE_SUB:
1928                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
1929                         break;
1930                 case OP_SBB:
1931                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
1932                         break;
1933                 case OP_SUBCC_IMM:
1934                 case OP_SUB_IMM:
1935                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
1936                         break;
1937                 case OP_SBB_IMM:
1938                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
1939                         break;
1940                 case CEE_AND:
1941                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
1942                         break;
1943                 case OP_AND_IMM:
1944                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
1945                         break;
1946                 case CEE_DIV:
1947                         x86_cdq (code);
1948                         x86_div_reg (code, ins->sreg2, TRUE);
1949                         break;
1950                 case CEE_DIV_UN:
1951                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1952                         x86_div_reg (code, ins->sreg2, FALSE);
1953                         break;
1954                 case OP_DIV_IMM:
1955                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1956                         x86_cdq (code);
1957                         x86_div_reg (code, ins->sreg2, TRUE);
1958                         break;
1959                 case CEE_REM:
1960                         x86_cdq (code);
1961                         x86_div_reg (code, ins->sreg2, TRUE);
1962                         break;
1963                 case CEE_REM_UN:
1964                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1965                         x86_div_reg (code, ins->sreg2, FALSE);
1966                         break;
1967                 case OP_REM_IMM:
1968                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1969                         x86_cdq (code);
1970                         x86_div_reg (code, ins->sreg2, TRUE);
1971                         break;
1972                 case CEE_OR:
1973                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
1974                         break;
1975                 case OP_OR_IMM:
1976                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
1977                         break;
1978                 case CEE_XOR:
1979                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
1980                         break;
1981                 case OP_XOR_IMM:
1982                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
1983                         break;
1984                 case CEE_SHL:
1985                         g_assert (ins->sreg2 == X86_ECX);
1986                         x86_shift_reg (code, X86_SHL, ins->dreg);
1987                         break;
1988                 case CEE_SHR:
1989                         g_assert (ins->sreg2 == X86_ECX);
1990                         x86_shift_reg (code, X86_SAR, ins->dreg);
1991                         break;
1992                 case OP_SHR_IMM:
1993                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
1994                         break;
1995                 case OP_SHR_UN_IMM:
1996                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
1997                         break;
1998                 case CEE_SHR_UN:
1999                         g_assert (ins->sreg2 == X86_ECX);
2000                         x86_shift_reg (code, X86_SHR, ins->dreg);
2001                         break;
2002                 case OP_SHL_IMM:
2003                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2004                         break;
2005                 case OP_LSHL: {
2006                         guint8 *jump_to_end;
2007
2008                         /* handle shifts below 32 bits */
2009                         x86_shld_reg (code, ins->unused, ins->sreg1);
2010                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2011
2012                         x86_test_reg_imm (code, X86_ECX, 32);
2013                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2014
2015                         /* handle shift over 32 bit */
2016                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
2017                         x86_clear_reg (code, ins->sreg1);
2018                         
2019                         x86_patch (jump_to_end, code);
2020                         }
2021                         break;
2022                 case OP_LSHR: {
2023                         guint8 *jump_to_end;
2024
2025                         /* handle shifts below 32 bits */
2026                         x86_shrd_reg (code, ins->sreg1, ins->unused);
2027                         x86_shift_reg (code, X86_SAR, ins->unused);
2028
2029                         x86_test_reg_imm (code, X86_ECX, 32);
2030                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2031
2032                         /* handle shifts over 31 bits */
2033                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2034                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
2035                         
2036                         x86_patch (jump_to_end, code);
2037                         }
2038                         break;
2039                 case OP_LSHR_UN: {
2040                         guint8 *jump_to_end;
2041
2042                         /* handle shifts below 32 bits */
2043                         x86_shrd_reg (code, ins->sreg1, ins->unused);
2044                         x86_shift_reg (code, X86_SHR, ins->unused);
2045
2046                         x86_test_reg_imm (code, X86_ECX, 32);
2047                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2048
2049                         /* handle shifts over 31 bits */
2050                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2051                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
2052                         
2053                         x86_patch (jump_to_end, code);
2054                         }
2055                         break;
2056                 case OP_LSHL_IMM:
2057                         if (ins->inst_imm >= 32) {
2058                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
2059                                 x86_clear_reg (code, ins->sreg1);
2060                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
2061                         } else {
2062                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
2063                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2064                         }
2065                         break;
2066                 case OP_LSHR_IMM:
2067                         if (ins->inst_imm >= 32) {
2068                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
2069                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
2070                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2071                         } else {
2072                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2073                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
2074                         }
2075                         break;
2076                 case OP_LSHR_UN_IMM:
2077                         if (ins->inst_imm >= 32) {
2078                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2079                                 x86_clear_reg (code, ins->unused);
2080                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2081                         } else {
2082                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2083                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
2084                         }
2085                         break;
2086                 case CEE_NOT:
2087                         x86_not_reg (code, ins->sreg1);
2088                         break;
2089                 case CEE_NEG:
2090                         x86_neg_reg (code, ins->sreg1);
2091                         break;
2092                 case OP_SEXT_I1:
2093                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2094                         break;
2095                 case OP_SEXT_I2:
2096                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2097                         break;
2098                 case CEE_MUL:
2099                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2100                         break;
2101                 case OP_MUL_IMM:
2102                         switch (ins->inst_imm) {
2103                         case 2:
2104                                 /* MOV r1, r2 */
2105                                 /* ADD r1, r1 */
2106                                 if (ins->dreg != ins->sreg1)
2107                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2108                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2109                                 break;
2110                         case 3:
2111                                 /* LEA r1, [r2 + r2*2] */
2112                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2113                                 break;
2114                         case 5:
2115                                 /* LEA r1, [r2 + r2*4] */
2116                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2117                                 break;
2118                         case 6:
2119                                 /* LEA r1, [r2 + r2*2] */
2120                                 /* ADD r1, r1          */
2121                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2122                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2123                                 break;
2124                         case 9:
2125                                 /* LEA r1, [r2 + r2*8] */
2126                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2127                                 break;
2128                         case 10:
2129                                 /* LEA r1, [r2 + r2*4] */
2130                                 /* ADD r1, r1          */
2131                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2132                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2133                                 break;
2134                         case 12:
2135                                 /* LEA r1, [r2 + r2*2] */
2136                                 /* SHL r1, 2           */
2137                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2138                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2139                                 break;
2140                         case 25:
2141                                 /* LEA r1, [r2 + r2*4] */
2142                                 /* LEA r1, [r1 + r1*4] */
2143                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2144                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2145                                 break;
2146                         case 100:
2147                                 /* LEA r1, [r2 + r2*4] */
2148                                 /* SHL r1, 2           */
2149                                 /* LEA r1, [r1 + r1*4] */
2150                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2151                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2152                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2153                                 break;
2154                         default:
2155                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2156                                 break;
2157                         }
2158                         break;
2159                 case CEE_MUL_OVF:
2160                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2161                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2162                         break;
2163                 case CEE_MUL_OVF_UN: {
2164                         /* the mul operation and the exception check should most likely be split */
2165                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2166                         /*g_assert (ins->sreg2 == X86_EAX);
2167                         g_assert (ins->dreg == X86_EAX);*/
2168                         if (ins->sreg2 == X86_EAX) {
2169                                 non_eax_reg = ins->sreg1;
2170                         } else if (ins->sreg1 == X86_EAX) {
2171                                 non_eax_reg = ins->sreg2;
2172                         } else {
2173                                 /* no need to save since we're going to store to it anyway */
2174                                 if (ins->dreg != X86_EAX) {
2175                                         saved_eax = TRUE;
2176                                         x86_push_reg (code, X86_EAX);
2177                                 }
2178                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2179                                 non_eax_reg = ins->sreg2;
2180                         }
2181                         if (ins->dreg == X86_EDX) {
2182                                 if (!saved_eax) {
2183                                         saved_eax = TRUE;
2184                                         x86_push_reg (code, X86_EAX);
2185                                 }
2186                         } else if (ins->dreg != X86_EAX) {
2187                                 saved_edx = TRUE;
2188                                 x86_push_reg (code, X86_EDX);
2189                         }
2190                         x86_mul_reg (code, non_eax_reg, FALSE);
2191                         /* save before the check since pop and mov don't change the flags */
2192                         if (ins->dreg != X86_EAX)
2193                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2194                         if (saved_edx)
2195                                 x86_pop_reg (code, X86_EDX);
2196                         if (saved_eax)
2197                                 x86_pop_reg (code, X86_EAX);
2198                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2199                         break;
2200                 }
2201                 case OP_ICONST:
2202                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2203                         break;
2204                 case OP_AOTCONST:
2205                         g_assert_not_reached ();
2206                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2207                         x86_mov_reg_imm (code, ins->dreg, 0);
2208                         break;
2209                 case OP_LOAD_GOTADDR:
2210                         x86_call_imm (code, 0);
2211                         /* 
2212                          * The patch needs to point to the pop, since the GOT offset needs 
2213                          * to be added to that address.
2214                          */
2215                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2216                         x86_pop_reg (code, ins->dreg);
2217                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2218                         break;
2219                 case OP_GOT_ENTRY:
2220                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2221                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2222                         break;
2223                 case OP_X86_PUSH_GOT_ENTRY:
2224                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2225                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2226                         break;
2227                 case CEE_CONV_I4:
2228                 case OP_MOVE:
2229                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2230                         break;
2231                 case CEE_CONV_U4:
2232                         g_assert_not_reached ();
2233                 case CEE_JMP: {
2234                         /*
2235                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2236                          * Keep in sync with the code in emit_epilog.
2237                          */
2238                         int pos = 0;
2239
2240                         /* FIXME: no tracing support... */
2241                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2242                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2243                         /* reset offset to make max_len work */
2244                         offset = code - cfg->native_code;
2245
2246                         g_assert (!cfg->method->save_lmf);
2247
2248                         if (cfg->used_int_regs & (1 << X86_EBX))
2249                                 pos -= 4;
2250                         if (cfg->used_int_regs & (1 << X86_EDI))
2251                                 pos -= 4;
2252                         if (cfg->used_int_regs & (1 << X86_ESI))
2253                                 pos -= 4;
2254                         if (pos)
2255                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2256         
2257                         if (cfg->used_int_regs & (1 << X86_ESI))
2258                                 x86_pop_reg (code, X86_ESI);
2259                         if (cfg->used_int_regs & (1 << X86_EDI))
2260                                 x86_pop_reg (code, X86_EDI);
2261                         if (cfg->used_int_regs & (1 << X86_EBX))
2262                                 x86_pop_reg (code, X86_EBX);
2263         
2264                         /* restore ESP/EBP */
2265                         x86_leave (code);
2266                         offset = code - cfg->native_code;
2267                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2268                         x86_jump32 (code, 0);
2269                         break;
2270                 }
2271                 case OP_CHECK_THIS:
2272                         /* ensure ins->sreg1 is not NULL
2273                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2274                          * cmp DWORD PTR [eax], 0
2275                          */
2276                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2277                         break;
2278                 case OP_ARGLIST: {
2279                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2280                         x86_push_reg (code, hreg);
2281                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2282                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2283                         x86_pop_reg (code, hreg);
2284                         break;
2285                 }
2286                 case OP_FCALL:
2287                 case OP_LCALL:
2288                 case OP_VCALL:
2289                 case OP_VOIDCALL:
2290                 case CEE_CALL:
2291                         call = (MonoCallInst*)ins;
2292                         if (ins->flags & MONO_INST_HAS_METHOD)
2293                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2294                         else
2295                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2296                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2297                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2298                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2299                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2300                                  * smart enough to do that optimization yet
2301                                  *
2302                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2303                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2304                                  * (most likely from locality benefits). People with other processors should
2305                                  * check on theirs to see what happens.
2306                                  */
2307                                 if (call->stack_usage == 4) {
2308                                         /* we want to use registers that won't get used soon, so use
2309                                          * ecx, as eax will get allocated first. edx is used by long calls,
2310                                          * so we can't use that.
2311                                          */
2312                                         
2313                                         x86_pop_reg (code, X86_ECX);
2314                                 } else {
2315                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2316                                 }
2317                         }
2318                         code = emit_move_return_value (cfg, ins, code);
2319                         break;
2320                 case OP_FCALL_REG:
2321                 case OP_LCALL_REG:
2322                 case OP_VCALL_REG:
2323                 case OP_VOIDCALL_REG:
2324                 case OP_CALL_REG:
2325                         call = (MonoCallInst*)ins;
2326                         x86_call_reg (code, ins->sreg1);
2327                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2328                                 if (call->stack_usage == 4)
2329                                         x86_pop_reg (code, X86_ECX);
2330                                 else
2331                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2332                         }
2333                         code = emit_move_return_value (cfg, ins, code);
2334                         break;
2335                 case OP_FCALL_MEMBASE:
2336                 case OP_LCALL_MEMBASE:
2337                 case OP_VCALL_MEMBASE:
2338                 case OP_VOIDCALL_MEMBASE:
2339                 case OP_CALL_MEMBASE:
2340                         call = (MonoCallInst*)ins;
2341                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2342                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2343                                 if (call->stack_usage == 4)
2344                                         x86_pop_reg (code, X86_ECX);
2345                                 else
2346                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2347                         }
2348                         code = emit_move_return_value (cfg, ins, code);
2349                         break;
2350                 case OP_OUTARG:
2351                 case OP_X86_PUSH:
2352                         x86_push_reg (code, ins->sreg1);
2353                         break;
2354                 case OP_X86_PUSH_IMM:
2355                         x86_push_imm (code, ins->inst_imm);
2356                         break;
2357                 case OP_X86_PUSH_MEMBASE:
2358                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2359                         break;
2360                 case OP_X86_PUSH_OBJ: 
2361                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2362                         x86_push_reg (code, X86_EDI);
2363                         x86_push_reg (code, X86_ESI);
2364                         x86_push_reg (code, X86_ECX);
2365                         if (ins->inst_offset)
2366                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2367                         else
2368                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2369                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2370                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2371                         x86_cld (code);
2372                         x86_prefix (code, X86_REP_PREFIX);
2373                         x86_movsd (code);
2374                         x86_pop_reg (code, X86_ECX);
2375                         x86_pop_reg (code, X86_ESI);
2376                         x86_pop_reg (code, X86_EDI);
2377                         break;
2378                 case OP_X86_LEA:
2379                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2380                         break;
2381                 case OP_X86_LEA_MEMBASE:
2382                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2383                         break;
2384                 case OP_X86_XCHG:
2385                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2386                         break;
2387                 case OP_LOCALLOC:
2388                         /* keep alignment */
2389                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2390                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2391                         code = mono_emit_stack_alloc (code, ins);
2392                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2393                         break;
2394                 case CEE_RET:
2395                         x86_ret (code);
2396                         break;
2397                 case CEE_THROW: {
2398                         x86_push_reg (code, ins->sreg1);
2399                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2400                                                           (gpointer)"mono_arch_throw_exception");
2401                         break;
2402                 }
2403                 case OP_RETHROW: {
2404                         x86_push_reg (code, ins->sreg1);
2405                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2406                                                           (gpointer)"mono_arch_rethrow_exception");
2407                         break;
2408                 }
2409                 case OP_CALL_HANDLER: 
2410                         /* Align stack */
2411 #ifdef __APPLE__
2412                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2413 #endif
2414                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2415                         x86_call_imm (code, 0);
2416 #ifdef __APPLE__
2417                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2418 #endif
2419                         break;
2420                 case OP_LABEL:
2421                         ins->inst_c0 = code - cfg->native_code;
2422                         break;
2423                 case CEE_BR:
2424                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2425                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2426                         //break;
2427                         if (ins->flags & MONO_INST_BRLABEL) {
2428                                 if (ins->inst_i0->inst_c0) {
2429                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2430                                 } else {
2431                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2432                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2433                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2434                                                 x86_jump8 (code, 0);
2435                                         else 
2436                                                 x86_jump32 (code, 0);
2437                                 }
2438                         } else {
2439                                 if (ins->inst_target_bb->native_offset) {
2440                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2441                                 } else {
2442                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2443                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2444                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2445                                                 x86_jump8 (code, 0);
2446                                         else 
2447                                                 x86_jump32 (code, 0);
2448                                 } 
2449                         }
2450                         break;
2451                 case OP_BR_REG:
2452                         x86_jump_reg (code, ins->sreg1);
2453                         break;
2454                 case OP_CEQ:
2455                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2456                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2457                         break;
2458                 case OP_CLT:
2459                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2460                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2461                         break;
2462                 case OP_CLT_UN:
2463                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2464                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2465                         break;
2466                 case OP_CGT:
2467                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2468                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2469                         break;
2470                 case OP_CGT_UN:
2471                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2472                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2473                         break;
2474                 case OP_CNE:
2475                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2476                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2477                         break;
2478                 case OP_COND_EXC_EQ:
2479                 case OP_COND_EXC_NE_UN:
2480                 case OP_COND_EXC_LT:
2481                 case OP_COND_EXC_LT_UN:
2482                 case OP_COND_EXC_GT:
2483                 case OP_COND_EXC_GT_UN:
2484                 case OP_COND_EXC_GE:
2485                 case OP_COND_EXC_GE_UN:
2486                 case OP_COND_EXC_LE:
2487                 case OP_COND_EXC_LE_UN:
2488                 case OP_COND_EXC_OV:
2489                 case OP_COND_EXC_NO:
2490                 case OP_COND_EXC_C:
2491                 case OP_COND_EXC_NC:
2492                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2493                         break;
2494                 case CEE_BEQ:
2495                 case CEE_BNE_UN:
2496                 case CEE_BLT:
2497                 case CEE_BLT_UN:
2498                 case CEE_BGT:
2499                 case CEE_BGT_UN:
2500                 case CEE_BGE:
2501                 case CEE_BGE_UN:
2502                 case CEE_BLE:
2503                 case CEE_BLE_UN:
2504                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2505                         break;
2506
2507                 /* floating point opcodes */
2508                 case OP_R8CONST: {
2509                         double d = *(double *)ins->inst_p0;
2510
2511                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2512                                 x86_fldz (code);
2513                         } else if (d == 1.0) {
2514                                 x86_fld1 (code);
2515                         } else {
2516                                 if (cfg->compile_aot) {
2517                                         guint32 *val = (guint32*)&d;
2518                                         x86_push_imm (code, val [1]);
2519                                         x86_push_imm (code, val [0]);
2520                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2521                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2522                                 }
2523                                 else {
2524                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2525                                         x86_fld (code, NULL, TRUE);
2526                                 }
2527                         }
2528                         break;
2529                 }
2530                 case OP_R4CONST: {
2531                         float f = *(float *)ins->inst_p0;
2532
2533                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2534                                 x86_fldz (code);
2535                         } else if (f == 1.0) {
2536                                 x86_fld1 (code);
2537                         } else {
2538                                 if (cfg->compile_aot) {
2539                                         guint32 val = *(guint32*)&f;
2540                                         x86_push_imm (code, val);
2541                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2542                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2543                                 }
2544                                 else {
2545                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2546                                         x86_fld (code, NULL, FALSE);
2547                                 }
2548                         }
2549                         break;
2550                 }
2551                 case OP_STORER8_MEMBASE_REG:
2552                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2553                         break;
2554                 case OP_LOADR8_SPILL_MEMBASE:
2555                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2556                         x86_fxch (code, 1);
2557                         break;
2558                 case OP_LOADR8_MEMBASE:
2559                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2560                         break;
2561                 case OP_STORER4_MEMBASE_REG:
2562                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2563                         break;
2564                 case OP_LOADR4_MEMBASE:
2565                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2566                         break;
2567                 case CEE_CONV_R4: /* FIXME: change precision */
2568                 case CEE_CONV_R8:
2569                         x86_push_reg (code, ins->sreg1);
2570                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2571                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2572                         break;
2573                 case OP_X86_FP_LOAD_I8:
2574                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2575                         break;
2576                 case OP_X86_FP_LOAD_I4:
2577                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2578                         break;
2579                 case OP_FCONV_TO_I1:
2580                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2581                         break;
2582                 case OP_FCONV_TO_U1:
2583                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2584                         break;
2585                 case OP_FCONV_TO_I2:
2586                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2587                         break;
2588                 case OP_FCONV_TO_U2:
2589                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2590                         break;
2591                 case OP_FCONV_TO_I4:
2592                 case OP_FCONV_TO_I:
2593                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2594                         break;
2595                 case OP_FCONV_TO_I8:
2596                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2597                         x86_fnstcw_membase(code, X86_ESP, 0);
2598                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2599                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2600                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2601                         x86_fldcw_membase (code, X86_ESP, 2);
2602                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2603                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2604                         x86_pop_reg (code, ins->dreg);
2605                         x86_pop_reg (code, ins->unused);
2606                         x86_fldcw_membase (code, X86_ESP, 0);
2607                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2608                         break;
2609                 case OP_LCONV_TO_R_UN: { 
2610                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2611                         guint8 *br;
2612
2613                         /* load 64bit integer to FP stack */
2614                         x86_push_imm (code, 0);
2615                         x86_push_reg (code, ins->sreg2);
2616                         x86_push_reg (code, ins->sreg1);
2617                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2618                         /* store as 80bit FP value */
2619                         x86_fst80_membase (code, X86_ESP, 0);
2620                         
2621                         /* test if lreg is negative */
2622                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2623                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2624         
2625                         /* add correction constant mn */
2626                         x86_fld80_mem (code, mn);
2627                         x86_fld80_membase (code, X86_ESP, 0);
2628                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2629                         x86_fst80_membase (code, X86_ESP, 0);
2630
2631                         x86_patch (br, code);
2632
2633                         x86_fld80_membase (code, X86_ESP, 0);
2634                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2635
2636                         break;
2637                 }
2638                 case OP_LCONV_TO_OVF_I: {
2639                         guint8 *br [3], *label [1];
2640                         MonoInst *tins;
2641
2642                         /* 
2643                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2644                          */
2645                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2646
2647                         /* If the low word top bit is set, see if we are negative */
2648                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2649                         /* We are not negative (no top bit set, check for our top word to be zero */
2650                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2651                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2652                         label [0] = code;
2653
2654                         /* throw exception */
2655                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2656                         if (tins) {
2657                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2658                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2659                                         x86_jump8 (code, 0);
2660                                 else
2661                                         x86_jump32 (code, 0);
2662                         } else {
2663                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2664                                 x86_jump32 (code, 0);
2665                         }
2666         
2667         
2668                         x86_patch (br [0], code);
2669                         /* our top bit is set, check that top word is 0xfffffff */
2670                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2671                 
2672                         x86_patch (br [1], code);
2673                         /* nope, emit exception */
2674                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2675                         x86_patch (br [2], label [0]);
2676
2677                         if (ins->dreg != ins->sreg1)
2678                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2679                         break;
2680                 }
2681                 case OP_FADD:
2682                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2683                         break;
2684                 case OP_FSUB:
2685                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2686                         break;          
2687                 case OP_FMUL:
2688                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2689                         break;          
2690                 case OP_FDIV:
2691                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2692                         break;          
2693                 case OP_FNEG:
2694                         x86_fchs (code);
2695                         break;          
2696                 case OP_SIN:
2697                         x86_fsin (code);
2698                         x86_fldz (code);
2699                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2700                         break;          
2701                 case OP_COS:
2702                         x86_fcos (code);
2703                         x86_fldz (code);
2704                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2705                         break;          
2706                 case OP_ABS:
2707                         x86_fabs (code);
2708                         break;          
2709                 case OP_TAN: {
2710                         /* 
2711                          * it really doesn't make sense to inline all this code,
2712                          * it's here just to show that things may not be as simple 
2713                          * as they appear.
2714                          */
2715                         guchar *check_pos, *end_tan, *pop_jump;
2716                         x86_push_reg (code, X86_EAX);
2717                         x86_fptan (code);
2718                         x86_fnstsw (code);
2719                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2720                         check_pos = code;
2721                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2722                         x86_fstp (code, 0); /* pop the 1.0 */
2723                         end_tan = code;
2724                         x86_jump8 (code, 0);
2725                         x86_fldpi (code);
2726                         x86_fp_op (code, X86_FADD, 0);
2727                         x86_fxch (code, 1);
2728                         x86_fprem1 (code);
2729                         x86_fstsw (code);
2730                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2731                         pop_jump = code;
2732                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2733                         x86_fstp (code, 1);
2734                         x86_fptan (code);
2735                         x86_patch (pop_jump, code);
2736                         x86_fstp (code, 0); /* pop the 1.0 */
2737                         x86_patch (check_pos, code);
2738                         x86_patch (end_tan, code);
2739                         x86_fldz (code);
2740                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2741                         x86_pop_reg (code, X86_EAX);
2742                         break;
2743                 }
2744                 case OP_ATAN:
2745                         x86_fld1 (code);
2746                         x86_fpatan (code);
2747                         x86_fldz (code);
2748                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2749                         break;          
2750                 case OP_SQRT:
2751                         x86_fsqrt (code);
2752                         break;          
2753                 case OP_X86_FPOP:
2754                         x86_fstp (code, 0);
2755                         break;          
2756                 case OP_FREM: {
2757                         guint8 *l1, *l2;
2758
2759                         x86_push_reg (code, X86_EAX);
2760                         /* we need to exchange ST(0) with ST(1) */
2761                         x86_fxch (code, 1);
2762
2763                         /* this requires a loop, because fprem somtimes 
2764                          * returns a partial remainder */
2765                         l1 = code;
2766                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2767                         /* x86_fprem1 (code); */
2768                         x86_fprem (code);
2769                         x86_fnstsw (code);
2770                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
2771                         l2 = code + 2;
2772                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2773
2774                         /* pop result */
2775                         x86_fstp (code, 1);
2776
2777                         x86_pop_reg (code, X86_EAX);
2778                         break;
2779                 }
2780                 case OP_FCOMPARE:
2781                         if (cfg->opt & MONO_OPT_FCMOV) {
2782                                 x86_fcomip (code, 1);
2783                                 x86_fstp (code, 0);
2784                                 break;
2785                         }
2786                         /* this overwrites EAX */
2787                         EMIT_FPCOMPARE(code);
2788                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2789                         break;
2790                 case OP_FCEQ:
2791                         if (cfg->opt & MONO_OPT_FCMOV) {
2792                                 /* zeroing the register at the start results in 
2793                                  * shorter and faster code (we can also remove the widening op)
2794                                  */
2795                                 guchar *unordered_check;
2796                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2797                                 x86_fcomip (code, 1);
2798                                 x86_fstp (code, 0);
2799                                 unordered_check = code;
2800                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2801                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2802                                 x86_patch (unordered_check, code);
2803                                 break;
2804                         }
2805                         if (ins->dreg != X86_EAX) 
2806                                 x86_push_reg (code, X86_EAX);
2807
2808                         EMIT_FPCOMPARE(code);
2809                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2810                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2811                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2812                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2813
2814                         if (ins->dreg != X86_EAX) 
2815                                 x86_pop_reg (code, X86_EAX);
2816                         break;
2817                 case OP_FCLT:
2818                 case OP_FCLT_UN:
2819                         if (cfg->opt & MONO_OPT_FCMOV) {
2820                                 /* zeroing the register at the start results in 
2821                                  * shorter and faster code (we can also remove the widening op)
2822                                  */
2823                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2824                                 x86_fcomip (code, 1);
2825                                 x86_fstp (code, 0);
2826                                 if (ins->opcode == OP_FCLT_UN) {
2827                                         guchar *unordered_check = code;
2828                                         guchar *jump_to_end;
2829                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2830                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2831                                         jump_to_end = code;
2832                                         x86_jump8 (code, 0);
2833                                         x86_patch (unordered_check, code);
2834                                         x86_inc_reg (code, ins->dreg);
2835                                         x86_patch (jump_to_end, code);
2836                                 } else {
2837                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2838                                 }
2839                                 break;
2840                         }
2841                         if (ins->dreg != X86_EAX) 
2842                                 x86_push_reg (code, X86_EAX);
2843
2844                         EMIT_FPCOMPARE(code);
2845                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2846                         if (ins->opcode == OP_FCLT_UN) {
2847                                 guchar *is_not_zero_check, *end_jump;
2848                                 is_not_zero_check = code;
2849                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2850                                 end_jump = code;
2851                                 x86_jump8 (code, 0);
2852                                 x86_patch (is_not_zero_check, code);
2853                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2854
2855                                 x86_patch (end_jump, code);
2856                         }
2857                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2858                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2859
2860                         if (ins->dreg != X86_EAX) 
2861                                 x86_pop_reg (code, X86_EAX);
2862                         break;
2863                 case OP_FCGT:
2864                 case OP_FCGT_UN:
2865                         if (cfg->opt & MONO_OPT_FCMOV) {
2866                                 /* zeroing the register at the start results in 
2867                                  * shorter and faster code (we can also remove the widening op)
2868                                  */
2869                                 guchar *unordered_check;
2870                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2871                                 x86_fcomip (code, 1);
2872                                 x86_fstp (code, 0);
2873                                 if (ins->opcode == OP_FCGT) {
2874                                         unordered_check = code;
2875                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2876                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2877                                         x86_patch (unordered_check, code);
2878                                 } else {
2879                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2880                                 }
2881                                 break;
2882                         }
2883                         if (ins->dreg != X86_EAX) 
2884                                 x86_push_reg (code, X86_EAX);
2885
2886                         EMIT_FPCOMPARE(code);
2887                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2888                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2889                         if (ins->opcode == OP_FCGT_UN) {
2890                                 guchar *is_not_zero_check, *end_jump;
2891                                 is_not_zero_check = code;
2892                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2893                                 end_jump = code;
2894                                 x86_jump8 (code, 0);
2895                                 x86_patch (is_not_zero_check, code);
2896                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2897         
2898                                 x86_patch (end_jump, code);
2899                         }
2900                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2901                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2902
2903                         if (ins->dreg != X86_EAX) 
2904                                 x86_pop_reg (code, X86_EAX);
2905                         break;
2906                 case OP_FBEQ:
2907                         if (cfg->opt & MONO_OPT_FCMOV) {
2908                                 guchar *jump = code;
2909                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
2910                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2911                                 x86_patch (jump, code);
2912                                 break;
2913                         }
2914                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2915                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2916                         break;
2917                 case OP_FBNE_UN:
2918                         /* Branch if C013 != 100 */
2919                         if (cfg->opt & MONO_OPT_FCMOV) {
2920                                 /* branch if !ZF or (PF|CF) */
2921                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2922                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2923                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
2924                                 break;
2925                         }
2926                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2927                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2928                         break;
2929                 case OP_FBLT:
2930                         if (cfg->opt & MONO_OPT_FCMOV) {
2931                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2932                                 break;
2933                         }
2934                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2935                         break;
2936                 case OP_FBLT_UN:
2937                         if (cfg->opt & MONO_OPT_FCMOV) {
2938                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2939                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2940                                 break;
2941                         }
2942                         if (ins->opcode == OP_FBLT_UN) {
2943                                 guchar *is_not_zero_check, *end_jump;
2944                                 is_not_zero_check = code;
2945                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2946                                 end_jump = code;
2947                                 x86_jump8 (code, 0);
2948                                 x86_patch (is_not_zero_check, code);
2949                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2950
2951                                 x86_patch (end_jump, code);
2952                         }
2953                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2954                         break;
2955                 case OP_FBGT:
2956                 case OP_FBGT_UN:
2957                         if (cfg->opt & MONO_OPT_FCMOV) {
2958                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
2959                                 break;
2960                         }
2961                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2962                         if (ins->opcode == OP_FBGT_UN) {
2963                                 guchar *is_not_zero_check, *end_jump;
2964                                 is_not_zero_check = code;
2965                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2966                                 end_jump = code;
2967                                 x86_jump8 (code, 0);
2968                                 x86_patch (is_not_zero_check, code);
2969                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2970
2971                                 x86_patch (end_jump, code);
2972                         }
2973                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2974                         break;
2975                 case OP_FBGE:
2976                         /* Branch if C013 == 100 or 001 */
2977                         if (cfg->opt & MONO_OPT_FCMOV) {
2978                                 guchar *br1;
2979
2980                                 /* skip branch if C1=1 */
2981                                 br1 = code;
2982                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2983                                 /* branch if (C0 | C3) = 1 */
2984                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
2985                                 x86_patch (br1, code);
2986                                 break;
2987                         }
2988                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2989                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2990                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2991                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2992                         break;
2993                 case OP_FBGE_UN:
2994                         /* Branch if C013 == 000 */
2995                         if (cfg->opt & MONO_OPT_FCMOV) {
2996                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
2997                                 break;
2998                         }
2999                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3000                         break;
3001                 case OP_FBLE:
3002                         /* Branch if C013=000 or 100 */
3003                         if (cfg->opt & MONO_OPT_FCMOV) {
3004                                 guchar *br1;
3005
3006                                 /* skip branch if C1=1 */
3007                                 br1 = code;
3008                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3009                                 /* branch if C0=0 */
3010                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3011                                 x86_patch (br1, code);
3012                                 break;
3013                         }
3014                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3015                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3016                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3017                         break;
3018                 case OP_FBLE_UN:
3019                         /* Branch if C013 != 001 */
3020                         if (cfg->opt & MONO_OPT_FCMOV) {
3021                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3022                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3023                                 break;
3024                         }
3025                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3026                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3027                         break;
3028                 case CEE_CKFINITE: {
3029                         x86_push_reg (code, X86_EAX);
3030                         x86_fxam (code);
3031                         x86_fnstsw (code);
3032                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3033                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3034                         x86_pop_reg (code, X86_EAX);
3035                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3036                         break;
3037                 }
3038                 case OP_TLS_GET: {
3039                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3040                         break;
3041                 }
3042                 case OP_MEMORY_BARRIER: {
3043                         /* Not needed on x86 */
3044                         break;
3045                 }
3046                 case OP_ATOMIC_ADD_I4: {
3047                         int dreg = ins->dreg;
3048
3049                         if (dreg == ins->inst_basereg) {
3050                                 x86_push_reg (code, ins->sreg2);
3051                                 dreg = ins->sreg2;
3052                         } 
3053                         
3054                         if (dreg != ins->sreg2)
3055                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3056
3057                         x86_prefix (code, X86_LOCK_PREFIX);
3058                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3059
3060                         if (dreg != ins->dreg) {
3061                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3062                                 x86_pop_reg (code, dreg);
3063                         }
3064
3065                         break;
3066                 }
3067                 case OP_ATOMIC_ADD_NEW_I4: {
3068                         int dreg = ins->dreg;
3069
3070                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3071                         if (ins->sreg2 == dreg) {
3072                                 if (dreg == X86_EBX) {
3073                                         dreg = X86_EDI;
3074                                         if (ins->inst_basereg == X86_EDI)
3075                                                 dreg = X86_ESI;
3076                                 } else {
3077                                         dreg = X86_EBX;
3078                                         if (ins->inst_basereg == X86_EBX)
3079                                                 dreg = X86_EDI;
3080                                 }
3081                         } else if (ins->inst_basereg == dreg) {
3082                                 if (dreg == X86_EBX) {
3083                                         dreg = X86_EDI;
3084                                         if (ins->sreg2 == X86_EDI)
3085                                                 dreg = X86_ESI;
3086                                 } else {
3087                                         dreg = X86_EBX;
3088                                         if (ins->sreg2 == X86_EBX)
3089                                                 dreg = X86_EDI;
3090                                 }
3091                         }
3092
3093                         if (dreg != ins->dreg) {
3094                                 x86_push_reg (code, dreg);
3095                         }
3096
3097                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3098                         x86_prefix (code, X86_LOCK_PREFIX);
3099                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3100                         /* dreg contains the old value, add with sreg2 value */
3101                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3102                         
3103                         if (ins->dreg != dreg) {
3104                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3105                                 x86_pop_reg (code, dreg);
3106                         }
3107
3108                         break;
3109                 }
3110                 case OP_ATOMIC_EXCHANGE_I4: {
3111                         guchar *br[2];
3112                         int sreg2 = ins->sreg2;
3113                         int breg = ins->inst_basereg;
3114
3115                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3116                          * hack to overcome limits in x86 reg allocator 
3117                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3118                          */
3119                         if (ins->dreg != X86_EAX)
3120                                 x86_push_reg (code, X86_EAX);
3121                         
3122                         /* We need the EAX reg for the cmpxchg */
3123                         if (ins->sreg2 == X86_EAX) {
3124                                 x86_push_reg (code, X86_EDX);
3125                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3126                                 sreg2 = X86_EDX;
3127                         }
3128
3129                         if (breg == X86_EAX) {
3130                                 x86_push_reg (code, X86_ESI);
3131                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3132                                 breg = X86_ESI;
3133                         }
3134
3135                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3136
3137                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3138                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3139                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3140                         x86_patch (br [1], br [0]);
3141
3142                         if (breg != ins->inst_basereg)
3143                                 x86_pop_reg (code, X86_ESI);
3144
3145                         if (ins->dreg != X86_EAX) {
3146                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3147                                 x86_pop_reg (code, X86_EAX);
3148                         }
3149
3150                         if (ins->sreg2 != sreg2)
3151                                 x86_pop_reg (code, X86_EDX);
3152
3153                         break;
3154                 }
3155                 default:
3156                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3157                         g_assert_not_reached ();
3158                 }
3159
3160                 if ((code - cfg->native_code - offset) > max_len) {
3161                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3162                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3163                         g_assert_not_reached ();
3164                 }
3165                
3166                 cpos += max_len;
3167
3168                 last_ins = ins;
3169                 last_offset = offset;
3170                 
3171                 ins = ins->next;
3172         }
3173
3174         cfg->code_len = code - cfg->native_code;
3175 }
3176
3177 void
3178 mono_arch_register_lowlevel_calls (void)
3179 {
3180 }
3181
3182 void
3183 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3184 {
3185         MonoJumpInfo *patch_info;
3186         gboolean compile_aot = !run_cctors;
3187
3188         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3189                 unsigned char *ip = patch_info->ip.i + code;
3190                 const unsigned char *target;
3191
3192                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3193
3194                 if (compile_aot) {
3195                         switch (patch_info->type) {
3196                         case MONO_PATCH_INFO_BB:
3197                         case MONO_PATCH_INFO_LABEL:
3198                                 break;
3199                         default:
3200                                 /* No need to patch these */
3201                                 continue;
3202                         }
3203                 }
3204
3205                 switch (patch_info->type) {
3206                 case MONO_PATCH_INFO_IP:
3207                         *((gconstpointer *)(ip)) = target;
3208                         break;
3209                 case MONO_PATCH_INFO_CLASS_INIT: {
3210                         guint8 *code = ip;
3211                         /* Might already been changed to a nop */
3212                         x86_call_code (code, 0);
3213                         x86_patch (ip, target);
3214                         break;
3215                 }
3216                 case MONO_PATCH_INFO_ABS:
3217                 case MONO_PATCH_INFO_METHOD:
3218                 case MONO_PATCH_INFO_METHOD_JUMP:
3219                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3220                 case MONO_PATCH_INFO_BB:
3221                 case MONO_PATCH_INFO_LABEL:
3222                         x86_patch (ip, target);
3223                         break;
3224                 case MONO_PATCH_INFO_NONE:
3225                         break;
3226                 default: {
3227                         guint32 offset = mono_arch_get_patch_offset (ip);
3228                         *((gconstpointer *)(ip + offset)) = target;
3229                         break;
3230                 }
3231                 }
3232         }
3233 }
3234
3235 guint8 *
3236 mono_arch_emit_prolog (MonoCompile *cfg)
3237 {
3238         MonoMethod *method = cfg->method;
3239         MonoBasicBlock *bb;
3240         MonoMethodSignature *sig;
3241         MonoInst *inst;
3242         int alloc_size, pos, max_offset, i;
3243         guint8 *code;
3244
3245         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3246         code = cfg->native_code = g_malloc (cfg->code_size);
3247
3248         x86_push_reg (code, X86_EBP);
3249         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3250
3251         alloc_size = cfg->stack_offset;
3252         pos = 0;
3253
3254         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3255                 /* Might need to attach the thread to the JIT */
3256                 if (lmf_tls_offset != -1) {
3257                         guint8 *buf;
3258
3259                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3260                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3261                         buf = code;
3262                         x86_branch8 (code, X86_CC_NE, 0, 0);
3263                         x86_push_imm (code, cfg->domain);
3264                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3265                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3266                         x86_patch (buf, code);
3267 #ifdef PLATFORM_WIN32
3268                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3269                         /* FIXME: Add a separate key for LMF to avoid this */
3270                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3271 #endif
3272                 } else {
3273                         g_assert (!cfg->compile_aot);
3274                         x86_push_imm (code, cfg->domain);
3275                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3276                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3277                 }
3278         }
3279
3280         if (method->save_lmf) {
3281                 pos += sizeof (MonoLMF);
3282
3283                 /* save the current IP */
3284                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3285                 x86_push_imm_template (code);
3286
3287                 /* save all caller saved regs */
3288                 x86_push_reg (code, X86_EBP);
3289                 x86_push_reg (code, X86_ESI);
3290                 x86_push_reg (code, X86_EDI);
3291                 x86_push_reg (code, X86_EBX);
3292
3293                 /* save method info */
3294                 x86_push_imm (code, method);
3295
3296                 /* get the address of lmf for the current thread */
3297                 /* 
3298                  * This is performance critical so we try to use some tricks to make
3299                  * it fast.
3300                  */
3301                 if (lmf_tls_offset != -1) {
3302                         /* Load lmf quicky using the GS register */
3303                         code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
3304 #ifdef PLATFORM_WIN32
3305                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3306                         /* FIXME: Add a separate key for LMF to avoid this */
3307                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3308 #endif
3309                 } else {
3310                         if (cfg->compile_aot) {
3311                                 /* The GOT var does not exist yet */
3312                                 x86_call_imm (code, 0);
3313                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3314                                 x86_pop_reg (code, X86_EAX);
3315                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
3316                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3317                                 x86_call_membase (code, X86_EAX, 0xf0f0f0f0);
3318                         } else {
3319                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3320                         }
3321                 }
3322
3323                 /* push lmf */
3324                 x86_push_reg (code, X86_EAX); 
3325                 /* push *lfm (previous_lmf) */
3326                 x86_push_membase (code, X86_EAX, 0);
3327                 /* *(lmf) = ESP */
3328                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3329         } else {
3330
3331                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3332                         x86_push_reg (code, X86_EBX);
3333                         pos += 4;
3334                 }
3335
3336                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3337                         x86_push_reg (code, X86_EDI);
3338                         pos += 4;
3339                 }
3340
3341                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3342                         x86_push_reg (code, X86_ESI);
3343                         pos += 4;
3344                 }
3345         }
3346
3347         alloc_size -= pos;
3348
3349 #if __APPLE__
3350         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3351         {
3352                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3353                 if (tot & 4) {
3354                         tot += 4;
3355                         alloc_size += 4;
3356                 }
3357                 if (tot & 8) {
3358                         alloc_size += 8;
3359                 }
3360         }
3361 #endif
3362
3363         if (alloc_size) {
3364                 /* See mono_emit_stack_alloc */
3365 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3366                 guint32 remaining_size = alloc_size;
3367                 while (remaining_size >= 0x1000) {
3368                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3369                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3370                         remaining_size -= 0x1000;
3371                 }
3372                 if (remaining_size)
3373                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3374 #else
3375                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3376 #endif
3377         }
3378
3379 #if __APPLE_
3380         /* check the stack is aligned */
3381         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3382         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3383         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3384         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3385         x86_breakpoint (code);
3386 #endif
3387
3388         /* compute max_offset in order to use short forward jumps */
3389         max_offset = 0;
3390         if (cfg->opt & MONO_OPT_BRANCH) {
3391                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3392                         MonoInst *ins = bb->code;
3393                         bb->max_offset = max_offset;
3394
3395                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3396                                 max_offset += 6;
3397                         /* max alignment for loops */
3398                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3399                                 max_offset += LOOP_ALIGNMENT;
3400
3401                         while (ins) {
3402                                 if (ins->opcode == OP_LABEL)
3403                                         ins->inst_c1 = max_offset;
3404                                 
3405                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3406                                 ins = ins->next;
3407                         }
3408                 }
3409         }
3410
3411         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3412                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3413
3414         /* load arguments allocated to register from the stack */
3415         sig = mono_method_signature (method);
3416         pos = 0;
3417
3418         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3419                 inst = cfg->varinfo [pos];
3420                 if (inst->opcode == OP_REGVAR) {
3421                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3422                         if (cfg->verbose_level > 2)
3423                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3424                 }
3425                 pos++;
3426         }
3427
3428         cfg->code_len = code - cfg->native_code;
3429
3430         return code;
3431 }
3432
3433 void
3434 mono_arch_emit_epilog (MonoCompile *cfg)
3435 {
3436         MonoMethod *method = cfg->method;
3437         MonoMethodSignature *sig = mono_method_signature (method);
3438         int quad, pos;
3439         guint32 stack_to_pop;
3440         guint8 *code;
3441         int max_epilog_size = 16;
3442         CallInfo *cinfo;
3443         
3444         if (cfg->method->save_lmf)
3445                 max_epilog_size += 128;
3446         
3447         if (mono_jit_trace_calls != NULL)
3448                 max_epilog_size += 50;
3449
3450         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3451                 cfg->code_size *= 2;
3452                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3453                 mono_jit_stats.code_reallocs++;
3454         }
3455
3456         code = cfg->native_code + cfg->code_len;
3457
3458         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3459                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3460
3461         /* the code restoring the registers must be kept in sync with CEE_JMP */
3462         pos = 0;
3463         
3464         if (method->save_lmf) {
3465                 gint32 prev_lmf_reg;
3466                 gint32 lmf_offset = -sizeof (MonoLMF);
3467
3468                 /* Find a spare register */
3469                 switch (sig->ret->type) {
3470                 case MONO_TYPE_I8:
3471                 case MONO_TYPE_U8:
3472                         prev_lmf_reg = X86_EDI;
3473                         cfg->used_int_regs |= (1 << X86_EDI);
3474                         break;
3475                 default:
3476                         prev_lmf_reg = X86_EDX;
3477                         break;
3478                 }
3479
3480                 /* reg = previous_lmf */
3481                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3482
3483                 /* ecx = lmf */
3484                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3485
3486                 /* *(lmf) = previous_lmf */
3487                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3488
3489                 /* restore caller saved regs */
3490                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3491                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3492                 }
3493
3494                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3495                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3496                 }
3497                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3498                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3499                 }
3500
3501                 /* EBP is restored by LEAVE */
3502         } else {
3503                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3504                         pos -= 4;
3505                 }
3506                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3507                         pos -= 4;
3508                 }
3509                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3510                         pos -= 4;
3511                 }
3512
3513                 if (pos)
3514                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3515
3516                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3517                         x86_pop_reg (code, X86_ESI);
3518                 }
3519                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3520                         x86_pop_reg (code, X86_EDI);
3521                 }
3522                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3523                         x86_pop_reg (code, X86_EBX);
3524                 }
3525         }
3526
3527         /* Load returned vtypes into registers if needed */
3528         cinfo = get_call_info (sig, FALSE);
3529         if (cinfo->ret.storage == ArgValuetypeInReg) {
3530                 for (quad = 0; quad < 2; quad ++) {
3531                         switch (cinfo->ret.pair_storage [quad]) {
3532                         case ArgInIReg:
3533                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3534                                 break;
3535                         case ArgOnFloatFpStack:
3536                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3537                                 break;
3538                         case ArgOnDoubleFpStack:
3539                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3540                                 break;
3541                         case ArgNone:
3542                                 break;
3543                         default:
3544                                 g_assert_not_reached ();
3545                         }
3546                 }
3547         }
3548
3549         x86_leave (code);
3550
3551         if (CALLCONV_IS_STDCALL (sig)) {
3552                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3553
3554                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3555         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3556                 stack_to_pop = 4;
3557         else
3558                 stack_to_pop = 0;
3559
3560         if (stack_to_pop)
3561                 x86_ret_imm (code, stack_to_pop);
3562         else
3563                 x86_ret (code);
3564
3565         g_free (cinfo);
3566
3567         cfg->code_len = code - cfg->native_code;
3568
3569         g_assert (cfg->code_len < cfg->code_size);
3570 }
3571
3572 void
3573 mono_arch_emit_exceptions (MonoCompile *cfg)
3574 {
3575         MonoJumpInfo *patch_info;
3576         int nthrows, i;
3577         guint8 *code;
3578         MonoClass *exc_classes [16];
3579         guint8 *exc_throw_start [16], *exc_throw_end [16];
3580         guint32 code_size;
3581         int exc_count = 0;
3582
3583         /* Compute needed space */
3584         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3585                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3586                         exc_count++;
3587         }
3588
3589         /* 
3590          * make sure we have enough space for exceptions
3591          * 16 is the size of two push_imm instructions and a call
3592          */
3593         if (cfg->compile_aot)
3594                 code_size = exc_count * 32;
3595         else
3596                 code_size = exc_count * 16;
3597
3598         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3599                 cfg->code_size *= 2;
3600                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3601                 mono_jit_stats.code_reallocs++;
3602         }
3603
3604         code = cfg->native_code + cfg->code_len;
3605
3606         nthrows = 0;
3607         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3608                 switch (patch_info->type) {
3609                 case MONO_PATCH_INFO_EXC: {
3610                         MonoClass *exc_class;
3611                         guint8 *buf, *buf2;
3612                         guint32 throw_ip;
3613
3614                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3615
3616                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3617                         g_assert (exc_class);
3618                         throw_ip = patch_info->ip.i;
3619
3620                         /* Find a throw sequence for the same exception class */
3621                         for (i = 0; i < nthrows; ++i)
3622                                 if (exc_classes [i] == exc_class)
3623                                         break;
3624                         if (i < nthrows) {
3625                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3626                                 x86_jump_code (code, exc_throw_start [i]);
3627                                 patch_info->type = MONO_PATCH_INFO_NONE;
3628                         }
3629                         else {
3630                                 guint32 got_reg = X86_EAX;
3631                                 guint32 size;
3632
3633                                 /* Compute size of code following the push <OFFSET> */
3634                                 if (cfg->compile_aot) {
3635                                         size = 5 + 6;
3636                                         if (!cfg->got_var)
3637                                                 size += 32;
3638                                         else if (cfg->got_var->opcode == OP_REGOFFSET)
3639                                                 size += 6;
3640                                 }
3641                                 else
3642                                         size = 5 + 5;
3643
3644                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3645                                         /* Use the shorter form */
3646                                         buf = buf2 = code;
3647                                         x86_push_imm (code, 0);
3648                                 }
3649                                 else {
3650                                         buf = code;
3651                                         x86_push_imm (code, 0xf0f0f0f0);
3652                                         buf2 = code;
3653                                 }
3654
3655                                 if (nthrows < 16) {
3656                                         exc_classes [nthrows] = exc_class;
3657                                         exc_throw_start [nthrows] = code;
3658                                 }
3659
3660                                 if (cfg->compile_aot) {          
3661                                         /*
3662                                          * Since the patches are generated by the back end, there is                                     * no way to generate a got_var at this point.   
3663                                          */
3664                                         if (!cfg->got_var) {
3665                                                 x86_call_imm (code, 0);
3666                                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3667                                                 x86_pop_reg (code, X86_EAX);
3668                                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
3669                                         }
3670                                         else {
3671                                                 if (cfg->got_var->opcode == OP_REGOFFSET)
3672                                                         x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
3673                                                 else
3674                                                         got_reg = cfg->got_var->dreg;
3675                                         }
3676                                 }
3677
3678                                 x86_push_imm (code, exc_class->type_token);
3679                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3680                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3681                                 patch_info->ip.i = code - cfg->native_code;
3682                                 if (cfg->compile_aot)
3683                                         x86_call_membase (code, got_reg, 0xf0f0f0f0);
3684                                 else
3685                                         x86_call_code (code, 0);
3686                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3687                                 while (buf < buf2)
3688                                         x86_nop (buf);
3689
3690                                 if (nthrows < 16) {
3691                                         exc_throw_end [nthrows] = code;
3692                                         nthrows ++;
3693                                 }
3694                         }
3695                         break;
3696                 }
3697                 default:
3698                         /* do nothing */
3699                         break;
3700                 }
3701         }
3702
3703         cfg->code_len = code - cfg->native_code;
3704
3705         g_assert (cfg->code_len < cfg->code_size);
3706 }
3707
3708 void
3709 mono_arch_flush_icache (guint8 *code, gint size)
3710 {
3711         /* not needed */
3712 }
3713
3714 void
3715 mono_arch_flush_register_windows (void)
3716 {
3717 }
3718
3719 /*
3720  * Support for fast access to the thread-local lmf structure using the GS
3721  * segment register on NPTL + kernel 2.6.x.
3722  */
3723
3724 static gboolean tls_offset_inited = FALSE;
3725
3726 void
3727 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3728 {
3729         if (!tls_offset_inited) {
3730                 if (!getenv ("MONO_NO_TLS")) {
3731 #ifdef PLATFORM_WIN32
3732                         /* 
3733                          * We need to init this multiple times, since when we are first called, the key might not
3734                          * be initialized yet.
3735                          */
3736                         appdomain_tls_offset = mono_domain_get_tls_key ();
3737                         lmf_tls_offset = mono_get_jit_tls_key ();
3738                         thread_tls_offset = mono_thread_get_tls_key ();
3739
3740                         /* Only 64 tls entries can be accessed using inline code */
3741                         if (appdomain_tls_offset >= 64)
3742                                 appdomain_tls_offset = -1;
3743                         if (lmf_tls_offset >= 64)
3744                                 lmf_tls_offset = -1;
3745                         if (thread_tls_offset >= 64)
3746                                 thread_tls_offset = -1;
3747 #else
3748 #if MONO_XEN_OPT
3749                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
3750 #endif
3751                         tls_offset_inited = TRUE;
3752                         appdomain_tls_offset = mono_domain_get_tls_offset ();
3753                         lmf_tls_offset = mono_get_lmf_tls_offset ();
3754                         thread_tls_offset = mono_thread_get_tls_offset ();
3755 #endif
3756                 }
3757         }               
3758 }
3759
3760 void
3761 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
3762 {
3763 }
3764
3765 void
3766 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3767 {
3768         MonoCallInst *call = (MonoCallInst*)inst;
3769         CallInfo *cinfo = get_call_info (inst->signature, FALSE);
3770
3771         /* add the this argument */
3772         if (this_reg != -1) {
3773                 if (cinfo->args [0].storage == ArgInIReg) {
3774                         MonoInst *this;
3775                         MONO_INST_NEW (cfg, this, OP_MOVE);
3776                         this->type = this_type;
3777                         this->sreg1 = this_reg;
3778                         this->dreg = mono_regstate_next_int (cfg->rs);
3779                         mono_bblock_add_inst (cfg->cbb, this);
3780
3781                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
3782                 }
3783                 else {
3784                         MonoInst *this;
3785                         MONO_INST_NEW (cfg, this, OP_OUTARG);
3786                         this->type = this_type;
3787                         this->sreg1 = this_reg;
3788                         mono_bblock_add_inst (cfg->cbb, this);
3789                 }
3790         }
3791
3792         if (vt_reg != -1) {
3793                 MonoInst *vtarg;
3794
3795                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3796                         /*
3797                          * The valuetype is in EAX:EDX after the call, needs to be copied to
3798                          * the stack. Save the address here, so the call instruction can
3799                          * access it.
3800                          */
3801                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
3802                         vtarg->inst_destbasereg = X86_ESP;
3803                         vtarg->inst_offset = inst->stack_usage;
3804                         vtarg->sreg1 = vt_reg;
3805                         mono_bblock_add_inst (cfg->cbb, vtarg);
3806                 }
3807                 else if (cinfo->ret.storage == ArgInIReg) {
3808                         /* The return address is passed in a register */
3809                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
3810                         vtarg->sreg1 = vt_reg;
3811                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
3812                         mono_bblock_add_inst (cfg->cbb, vtarg);
3813
3814                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
3815                 } else {
3816                         MonoInst *vtarg;
3817                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3818                         vtarg->type = STACK_MP;
3819                         vtarg->sreg1 = vt_reg;
3820                         mono_bblock_add_inst (cfg->cbb, vtarg);
3821                 }
3822         }
3823
3824         g_free (cinfo);
3825 }
3826
3827 MonoInst*
3828 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3829 {
3830         MonoInst *ins = NULL;
3831
3832         if (cmethod->klass == mono_defaults.math_class) {
3833                 if (strcmp (cmethod->name, "Sin") == 0) {
3834                         MONO_INST_NEW (cfg, ins, OP_SIN);
3835                         ins->inst_i0 = args [0];
3836                 } else if (strcmp (cmethod->name, "Cos") == 0) {
3837                         MONO_INST_NEW (cfg, ins, OP_COS);
3838                         ins->inst_i0 = args [0];
3839                 } else if (strcmp (cmethod->name, "Tan") == 0) {
3840                         MONO_INST_NEW (cfg, ins, OP_TAN);
3841                         ins->inst_i0 = args [0];
3842                 } else if (strcmp (cmethod->name, "Atan") == 0) {
3843                         MONO_INST_NEW (cfg, ins, OP_ATAN);
3844                         ins->inst_i0 = args [0];
3845                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
3846                         MONO_INST_NEW (cfg, ins, OP_SQRT);
3847                         ins->inst_i0 = args [0];
3848                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
3849                         MONO_INST_NEW (cfg, ins, OP_ABS);
3850                         ins->inst_i0 = args [0];
3851                 }
3852 #if 0
3853                 /* OP_FREM is not IEEE compatible */
3854                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
3855                         MONO_INST_NEW (cfg, ins, OP_FREM);
3856                         ins->inst_i0 = args [0];
3857                         ins->inst_i1 = args [1];
3858                 }
3859 #endif
3860         } else if (cmethod->klass == mono_defaults.thread_class &&
3861                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
3862                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
3863         } else if(cmethod->klass->image == mono_defaults.corlib &&
3864                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
3865                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
3866
3867                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3868                         MonoInst *ins_iconst;
3869
3870                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3871                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3872                         ins_iconst->inst_c0 = 1;
3873
3874                         ins->inst_i0 = args [0];
3875                         ins->inst_i1 = ins_iconst;
3876                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3877                         MonoInst *ins_iconst;
3878
3879                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3880                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3881                         ins_iconst->inst_c0 = -1;
3882
3883                         ins->inst_i0 = args [0];
3884                         ins->inst_i1 = ins_iconst;
3885                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3886                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
3887
3888                         ins->inst_i0 = args [0];
3889                         ins->inst_i1 = args [1];
3890                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3891                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_I4);
3892
3893                         ins->inst_i0 = args [0];
3894                         ins->inst_i1 = args [1];
3895                 }
3896         }
3897
3898         return ins;
3899 }
3900
3901
3902 gboolean
3903 mono_arch_print_tree (MonoInst *tree, int arity)
3904 {
3905         return 0;
3906 }
3907
3908 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
3909 {
3910         MonoInst* ins;
3911         
3912         if (appdomain_tls_offset == -1)
3913                 return NULL;
3914
3915         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3916         ins->inst_offset = appdomain_tls_offset;
3917         return ins;
3918 }
3919
3920 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
3921 {
3922         MonoInst* ins;
3923
3924         if (thread_tls_offset == -1)
3925                 return NULL;
3926
3927         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3928         ins->inst_offset = thread_tls_offset;
3929         return ins;
3930 }
3931
3932 guint32
3933 mono_arch_get_patch_offset (guint8 *code)
3934 {
3935         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
3936                 return 2;
3937         else if ((code [0] == 0xba))
3938                 return 1;
3939         else if ((code [0] == 0x68))
3940                 /* push IMM */
3941                 return 1;
3942         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
3943                 /* push <OFFSET>(<REG>) */
3944                 return 2;
3945         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
3946                 /* call *<OFFSET>(<REG>) */
3947                 return 2;
3948         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
3949                 /* fldl <ADDR> */
3950                 return 2;
3951         else if ((code [0] == 0x58) && (code [1] == 0x05))
3952                 /* pop %eax; add <OFFSET>, %eax */
3953                 return 2;
3954         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
3955                 /* pop <REG>; add <OFFSET>, <REG> */
3956                 return 3;
3957         else {
3958                 g_assert_not_reached ();
3959                 return -1;
3960         }
3961 }
3962
3963 gpointer*
3964 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
3965 {
3966         guint8 reg = 0;
3967         gint32 disp = 0;
3968
3969         /* go to the start of the call instruction
3970          *
3971          * address_byte = (m << 6) | (o << 3) | reg
3972          * call opcode: 0xff address_byte displacement
3973          * 0xff m=1,o=2 imm8
3974          * 0xff m=2,o=2 imm32
3975          */
3976         code -= 6;
3977         if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
3978                 reg = code [4] & 0x07;
3979                 disp = (signed char)code [5];
3980         } else {
3981                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
3982                         reg = code [1] & 0x07;
3983                         disp = *((gint32*)(code + 2));
3984                 } else if ((code [1] == 0xe8)) {
3985                         return NULL;
3986                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
3987                         /*
3988                          * This is a interface call: should check the above code can't catch it earlier 
3989                          * 8b 40 30   mov    0x30(%eax),%eax
3990                          * ff 10      call   *(%eax)
3991                          */
3992                         disp = 0;
3993                         reg = code [5] & 0x07;
3994                 }
3995                 else
3996                         return NULL;
3997         }
3998
3999         return (gpointer*)(((gint32)(regs [reg])) + disp);
4000 }
4001
4002 gpointer* 
4003 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
4004 {
4005         guint8 reg = 0;
4006         gint32 disp = 0;
4007
4008         code -= 7;
4009         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
4010                 reg = x86_modrm_rm (code [1]);
4011                 disp = code [4];
4012
4013                 if (reg == X86_EAX)
4014                         return NULL;
4015                 else
4016                         return (gpointer*)(((gint32)(regs [reg])) + disp);
4017         }
4018
4019         return NULL;
4020 }