* roottypes.cs: Rename from tree.cs.
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #include <unistd.h>
15
16 #include <mono/metadata/appdomain.h>
17 #include <mono/metadata/debug-helpers.h>
18 #include <mono/metadata/threads.h>
19 #include <mono/metadata/profiler-private.h>
20 #include <mono/utils/mono-math.h>
21
22 #include "trace.h"
23 #include "mini-x86.h"
24 #include "inssel.h"
25 #include "cpu-pentium.h"
26
27 /* On windows, these hold the key returned by TlsAlloc () */
28 static gint lmf_tls_offset = -1;
29 static gint appdomain_tls_offset = -1;
30 static gint thread_tls_offset = -1;
31
32 #ifdef MONO_XEN_OPT
33 /* TRUE by default until we add runtime detection of Xen */
34 static gboolean optimize_for_xen = TRUE;
35 #else
36 #define optimize_for_xen 0
37 #endif
38
39 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
40
41 #define ARGS_OFFSET 8
42
43 #ifdef PLATFORM_WIN32
44 /* Under windows, the default pinvoke calling convention is stdcall */
45 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
46 #else
47 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
48 #endif
49
50 #define NOT_IMPLEMENTED g_assert_not_reached ()
51
52 const char*
53 mono_arch_regname (int reg) {
54         switch (reg) {
55         case X86_EAX: return "%eax";
56         case X86_EBX: return "%ebx";
57         case X86_ECX: return "%ecx";
58         case X86_EDX: return "%edx";
59         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
60         case X86_EDI: return "%edi";
61         case X86_ESI: return "%esi";
62         }
63         return "unknown";
64 }
65
66 const char*
67 mono_arch_fregname (int reg) {
68         return "unknown";
69 }
70
71 typedef enum {
72         ArgInIReg,
73         ArgInFloatSSEReg,
74         ArgInDoubleSSEReg,
75         ArgOnStack,
76         ArgValuetypeInReg,
77         ArgOnFloatFpStack,
78         ArgOnDoubleFpStack,
79         ArgNone
80 } ArgStorage;
81
82 typedef struct {
83         gint16 offset;
84         gint8  reg;
85         ArgStorage storage;
86
87         /* Only if storage == ArgValuetypeInReg */
88         ArgStorage pair_storage [2];
89         gint8 pair_regs [2];
90 } ArgInfo;
91
92 typedef struct {
93         int nargs;
94         guint32 stack_usage;
95         guint32 reg_usage;
96         guint32 freg_usage;
97         gboolean need_stack_align;
98         guint32 stack_align_amount;
99         ArgInfo ret;
100         ArgInfo sig_cookie;
101         ArgInfo args [1];
102 } CallInfo;
103
104 #define PARAM_REGS 0
105
106 #define FLOAT_PARAM_REGS 0
107
108 static X86_Reg_No param_regs [] = { 0 };
109
110 #ifdef PLATFORM_WIN32
111 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
112 #endif
113
114 static void inline
115 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
116 {
117     ainfo->offset = *stack_size;
118
119     if (*gr >= PARAM_REGS) {
120                 ainfo->storage = ArgOnStack;
121                 (*stack_size) += sizeof (gpointer);
122     }
123     else {
124                 ainfo->storage = ArgInIReg;
125                 ainfo->reg = param_regs [*gr];
126                 (*gr) ++;
127     }
128 }
129
130 static void inline
131 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
132 {
133         ainfo->offset = *stack_size;
134
135         g_assert (PARAM_REGS == 0);
136         
137         ainfo->storage = ArgOnStack;
138         (*stack_size) += sizeof (gpointer) * 2;
139 }
140
141 static void inline
142 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
143 {
144     ainfo->offset = *stack_size;
145
146     if (*gr >= FLOAT_PARAM_REGS) {
147                 ainfo->storage = ArgOnStack;
148                 (*stack_size) += is_double ? 8 : 4;
149     }
150     else {
151                 /* A double register */
152                 if (is_double)
153                         ainfo->storage = ArgInDoubleSSEReg;
154                 else
155                         ainfo->storage = ArgInFloatSSEReg;
156                 ainfo->reg = *gr;
157                 (*gr) += 1;
158     }
159 }
160
161
162 static void
163 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
164                gboolean is_return,
165                guint32 *gr, guint32 *fr, guint32 *stack_size)
166 {
167         guint32 size;
168         MonoClass *klass;
169
170         klass = mono_class_from_mono_type (type);
171         if (sig->pinvoke) 
172                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
173         else 
174                 size = mono_type_stack_size (&klass->byval_arg, NULL);
175
176 #ifdef PLATFORM_WIN32
177         if (sig->pinvoke && is_return) {
178                 MonoMarshalType *info;
179
180                 /*
181                  * the exact rules are not very well documented, the code below seems to work with the 
182                  * code generated by gcc 3.3.3 -mno-cygwin.
183                  */
184                 info = mono_marshal_load_type_info (klass);
185                 g_assert (info);
186
187                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
188
189                 /* Special case structs with only a float member */
190                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
191                         ainfo->storage = ArgValuetypeInReg;
192                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
193                         return;
194                 }
195                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
196                         ainfo->storage = ArgValuetypeInReg;
197                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
198                         return;
199                 }               
200                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
201                         ainfo->storage = ArgValuetypeInReg;
202                         ainfo->pair_storage [0] = ArgInIReg;
203                         ainfo->pair_regs [0] = return_regs [0];
204                         if (info->native_size > 4) {
205                                 ainfo->pair_storage [1] = ArgInIReg;
206                                 ainfo->pair_regs [1] = return_regs [1];
207                         }
208                         return;
209                 }
210         }
211 #endif
212
213         ainfo->offset = *stack_size;
214         ainfo->storage = ArgOnStack;
215         *stack_size += ALIGN_TO (size, sizeof (gpointer));
216 }
217
218 /*
219  * get_call_info:
220  *
221  *  Obtain information about a call according to the calling convention.
222  * For x86 ELF, see the "System V Application Binary Interface Intel386 
223  * Architecture Processor Supplment, Fourth Edition" document for more
224  * information.
225  * For x86 win32, see ???.
226  */
227 static CallInfo*
228 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
229 {
230         guint32 i, gr, fr;
231         MonoType *ret_type;
232         int n = sig->hasthis + sig->param_count;
233         guint32 stack_size = 0;
234         CallInfo *cinfo;
235
236         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
237
238         gr = 0;
239         fr = 0;
240
241         /* return value */
242         {
243                 ret_type = mono_type_get_underlying_type (sig->ret);
244                 switch (ret_type->type) {
245                 case MONO_TYPE_BOOLEAN:
246                 case MONO_TYPE_I1:
247                 case MONO_TYPE_U1:
248                 case MONO_TYPE_I2:
249                 case MONO_TYPE_U2:
250                 case MONO_TYPE_CHAR:
251                 case MONO_TYPE_I4:
252                 case MONO_TYPE_U4:
253                 case MONO_TYPE_I:
254                 case MONO_TYPE_U:
255                 case MONO_TYPE_PTR:
256                 case MONO_TYPE_FNPTR:
257                 case MONO_TYPE_CLASS:
258                 case MONO_TYPE_OBJECT:
259                 case MONO_TYPE_SZARRAY:
260                 case MONO_TYPE_ARRAY:
261                 case MONO_TYPE_STRING:
262                         cinfo->ret.storage = ArgInIReg;
263                         cinfo->ret.reg = X86_EAX;
264                         break;
265                 case MONO_TYPE_U8:
266                 case MONO_TYPE_I8:
267                         cinfo->ret.storage = ArgInIReg;
268                         cinfo->ret.reg = X86_EAX;
269                         break;
270                 case MONO_TYPE_R4:
271                         cinfo->ret.storage = ArgOnFloatFpStack;
272                         break;
273                 case MONO_TYPE_R8:
274                         cinfo->ret.storage = ArgOnDoubleFpStack;
275                         break;
276                 case MONO_TYPE_GENERICINST:
277                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
278                                 cinfo->ret.storage = ArgInIReg;
279                                 cinfo->ret.reg = X86_EAX;
280                                 break;
281                         }
282                         /* Fall through */
283                 case MONO_TYPE_VALUETYPE: {
284                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
285
286                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
287                         if (cinfo->ret.storage == ArgOnStack)
288                                 /* The caller passes the address where the value is stored */
289                                 add_general (&gr, &stack_size, &cinfo->ret);
290                         break;
291                 }
292                 case MONO_TYPE_TYPEDBYREF:
293                         /* Same as a valuetype with size 24 */
294                         add_general (&gr, &stack_size, &cinfo->ret);
295                         ;
296                         break;
297                 case MONO_TYPE_VOID:
298                         cinfo->ret.storage = ArgNone;
299                         break;
300                 default:
301                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
302                 }
303         }
304
305         /* this */
306         if (sig->hasthis)
307                 add_general (&gr, &stack_size, cinfo->args + 0);
308
309         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
310                 gr = PARAM_REGS;
311                 fr = FLOAT_PARAM_REGS;
312                 
313                 /* Emit the signature cookie just before the implicit arguments */
314                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
315         }
316
317         for (i = 0; i < sig->param_count; ++i) {
318                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
319                 MonoType *ptype;
320
321                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
322                         /* We allways pass the sig cookie on the stack for simplicity */
323                         /* 
324                          * Prevent implicit arguments + the sig cookie from being passed 
325                          * in registers.
326                          */
327                         gr = PARAM_REGS;
328                         fr = FLOAT_PARAM_REGS;
329
330                         /* Emit the signature cookie just before the implicit arguments */
331                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
332                 }
333
334                 if (sig->params [i]->byref) {
335                         add_general (&gr, &stack_size, ainfo);
336                         continue;
337                 }
338                 ptype = mono_type_get_underlying_type (sig->params [i]);
339                 switch (ptype->type) {
340                 case MONO_TYPE_BOOLEAN:
341                 case MONO_TYPE_I1:
342                 case MONO_TYPE_U1:
343                         add_general (&gr, &stack_size, ainfo);
344                         break;
345                 case MONO_TYPE_I2:
346                 case MONO_TYPE_U2:
347                 case MONO_TYPE_CHAR:
348                         add_general (&gr, &stack_size, ainfo);
349                         break;
350                 case MONO_TYPE_I4:
351                 case MONO_TYPE_U4:
352                         add_general (&gr, &stack_size, ainfo);
353                         break;
354                 case MONO_TYPE_I:
355                 case MONO_TYPE_U:
356                 case MONO_TYPE_PTR:
357                 case MONO_TYPE_FNPTR:
358                 case MONO_TYPE_CLASS:
359                 case MONO_TYPE_OBJECT:
360                 case MONO_TYPE_STRING:
361                 case MONO_TYPE_SZARRAY:
362                 case MONO_TYPE_ARRAY:
363                         add_general (&gr, &stack_size, ainfo);
364                         break;
365                 case MONO_TYPE_GENERICINST:
366                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
367                                 add_general (&gr, &stack_size, ainfo);
368                                 break;
369                         }
370                         /* Fall through */
371                 case MONO_TYPE_VALUETYPE:
372                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
373                         break;
374                 case MONO_TYPE_TYPEDBYREF:
375                         stack_size += sizeof (MonoTypedRef);
376                         ainfo->storage = ArgOnStack;
377                         break;
378                 case MONO_TYPE_U8:
379                 case MONO_TYPE_I8:
380                         add_general_pair (&gr, &stack_size, ainfo);
381                         break;
382                 case MONO_TYPE_R4:
383                         add_float (&fr, &stack_size, ainfo, FALSE);
384                         break;
385                 case MONO_TYPE_R8:
386                         add_float (&fr, &stack_size, ainfo, TRUE);
387                         break;
388                 default:
389                         g_error ("unexpected type 0x%x", ptype->type);
390                         g_assert_not_reached ();
391                 }
392         }
393
394         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
395                 gr = PARAM_REGS;
396                 fr = FLOAT_PARAM_REGS;
397                 
398                 /* Emit the signature cookie just before the implicit arguments */
399                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
400         }
401
402 #if defined(__APPLE__)
403         if ((stack_size % 16) != 0) { 
404                 cinfo->need_stack_align = TRUE;
405                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
406         }
407 #endif
408
409         cinfo->stack_usage = stack_size;
410         cinfo->reg_usage = gr;
411         cinfo->freg_usage = fr;
412         return cinfo;
413 }
414
415 /*
416  * mono_arch_get_argument_info:
417  * @csig:  a method signature
418  * @param_count: the number of parameters to consider
419  * @arg_info: an array to store the result infos
420  *
421  * Gathers information on parameters such as size, alignment and
422  * padding. arg_info should be large enought to hold param_count + 1 entries. 
423  *
424  * Returns the size of the activation frame.
425  */
426 int
427 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
428 {
429         int k, frame_size = 0;
430         int size, pad;
431         guint32 align;
432         int offset = 8;
433         CallInfo *cinfo;
434
435         cinfo = get_call_info (csig, FALSE);
436
437         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
438                 frame_size += sizeof (gpointer);
439                 offset += 4;
440         }
441
442         arg_info [0].offset = offset;
443
444         if (csig->hasthis) {
445                 frame_size += sizeof (gpointer);
446                 offset += 4;
447         }
448
449         arg_info [0].size = frame_size;
450
451         for (k = 0; k < param_count; k++) {
452                 
453                 if (csig->pinvoke)
454                         size = mono_type_native_stack_size (csig->params [k], &align);
455                 else {
456                         int ialign;
457                         size = mono_type_stack_size (csig->params [k], &ialign);
458                         align = ialign;
459                 }
460
461                 /* ignore alignment for now */
462                 align = 1;
463
464                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
465                 arg_info [k].pad = pad;
466                 frame_size += size;
467                 arg_info [k + 1].pad = 0;
468                 arg_info [k + 1].size = size;
469                 offset += pad;
470                 arg_info [k + 1].offset = offset;
471                 offset += size;
472         }
473
474         align = MONO_ARCH_FRAME_ALIGNMENT;
475         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
476         arg_info [k].pad = pad;
477
478         g_free (cinfo);
479
480         return frame_size;
481 }
482
483 static const guchar cpuid_impl [] = {
484         0x55,                           /* push   %ebp */
485         0x89, 0xe5,                     /* mov    %esp,%ebp */
486         0x53,                           /* push   %ebx */
487         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
488         0x0f, 0xa2,                     /* cpuid   */
489         0x50,                           /* push   %eax */
490         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
491         0x89, 0x18,                     /* mov    %ebx,(%eax) */
492         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
493         0x89, 0x08,                     /* mov    %ecx,(%eax) */
494         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
495         0x89, 0x10,                     /* mov    %edx,(%eax) */
496         0x58,                           /* pop    %eax */
497         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
498         0x89, 0x02,                     /* mov    %eax,(%edx) */
499         0x5b,                           /* pop    %ebx */
500         0xc9,                           /* leave   */
501         0xc3,                           /* ret     */
502 };
503
504 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
505
506 static int 
507 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
508 {
509         int have_cpuid = 0;
510 #ifndef _MSC_VER
511         __asm__  __volatile__ (
512                 "pushfl\n"
513                 "popl %%eax\n"
514                 "movl %%eax, %%edx\n"
515                 "xorl $0x200000, %%eax\n"
516                 "pushl %%eax\n"
517                 "popfl\n"
518                 "pushfl\n"
519                 "popl %%eax\n"
520                 "xorl %%edx, %%eax\n"
521                 "andl $0x200000, %%eax\n"
522                 "movl %%eax, %0"
523                 : "=r" (have_cpuid)
524                 :
525                 : "%eax", "%edx"
526         );
527 #else
528         __asm {
529                 pushfd
530                 pop eax
531                 mov edx, eax
532                 xor eax, 0x200000
533                 push eax
534                 popfd
535                 pushfd
536                 pop eax
537                 xor eax, edx
538                 and eax, 0x200000
539                 mov have_cpuid, eax
540         }
541 #endif
542         if (have_cpuid) {
543                 /* Have to use the code manager to get around WinXP DEP */
544                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
545                 CpuidFunc func;
546                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
547                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
548
549                 func = (CpuidFunc)ptr;
550                 func (id, p_eax, p_ebx, p_ecx, p_edx);
551
552                 mono_code_manager_destroy (codeman);
553
554                 /*
555                  * We use this approach because of issues with gcc and pic code, see:
556                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
557                 __asm__ __volatile__ ("cpuid"
558                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
559                         : "a" (id));
560                 */
561                 return 1;
562         }
563         return 0;
564 }
565
566 /*
567  * Initialize the cpu to execute managed code.
568  */
569 void
570 mono_arch_cpu_init (void)
571 {
572         /* spec compliance requires running with double precision */
573 #ifndef _MSC_VER
574         guint16 fpcw;
575
576         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
577         fpcw &= ~X86_FPCW_PRECC_MASK;
578         fpcw |= X86_FPCW_PREC_DOUBLE;
579         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
580         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
581 #else
582         _control87 (_PC_53, MCW_PC);
583 #endif
584 }
585
586 /*
587  * This function returns the optimizations supported on this cpu.
588  */
589 guint32
590 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
591 {
592         int eax, ebx, ecx, edx;
593         guint32 opts = 0;
594         
595         *exclude_mask = 0;
596         /* Feature Flags function, flags returned in EDX. */
597         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
598                 if (edx & (1 << 15)) {
599                         opts |= MONO_OPT_CMOV;
600                         if (edx & 1)
601                                 opts |= MONO_OPT_FCMOV;
602                         else
603                                 *exclude_mask |= MONO_OPT_FCMOV;
604                 } else
605                         *exclude_mask |= MONO_OPT_CMOV;
606         }
607         return opts;
608 }
609
610 /*
611  * Determine whenever the trap whose info is in SIGINFO is caused by
612  * integer overflow.
613  */
614 gboolean
615 mono_arch_is_int_overflow (void *sigctx, void *info)
616 {
617         MonoContext ctx;
618         guint8* ip;
619
620         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
621
622         ip = (guint8*)ctx.eip;
623
624         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
625                 gint32 reg;
626
627                 /* idiv REG */
628                 switch (x86_modrm_rm (ip [1])) {
629                 case X86_EAX:
630                         reg = ctx.eax;
631                         break;
632                 case X86_ECX:
633                         reg = ctx.ecx;
634                         break;
635                 case X86_EDX:
636                         reg = ctx.edx;
637                         break;
638                 case X86_EBX:
639                         reg = ctx.ebx;
640                         break;
641                 case X86_ESI:
642                         reg = ctx.esi;
643                         break;
644                 case X86_EDI:
645                         reg = ctx.edi;
646                         break;
647                 default:
648                         g_assert_not_reached ();
649                         reg = -1;
650                 }
651
652                 if (reg == -1)
653                         return TRUE;
654         }
655                         
656         return FALSE;
657 }
658
659 static gboolean
660 is_regsize_var (MonoType *t) {
661         if (t->byref)
662                 return TRUE;
663         switch (mono_type_get_underlying_type (t)->type) {
664         case MONO_TYPE_I4:
665         case MONO_TYPE_U4:
666         case MONO_TYPE_I:
667         case MONO_TYPE_U:
668         case MONO_TYPE_PTR:
669         case MONO_TYPE_FNPTR:
670                 return TRUE;
671         case MONO_TYPE_OBJECT:
672         case MONO_TYPE_STRING:
673         case MONO_TYPE_CLASS:
674         case MONO_TYPE_SZARRAY:
675         case MONO_TYPE_ARRAY:
676                 return TRUE;
677         case MONO_TYPE_GENERICINST:
678                 if (!mono_type_generic_inst_is_valuetype (t))
679                         return TRUE;
680                 return FALSE;
681         case MONO_TYPE_VALUETYPE:
682                 return FALSE;
683         }
684         return FALSE;
685 }
686
687 GList *
688 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
689 {
690         GList *vars = NULL;
691         int i;
692
693         for (i = 0; i < cfg->num_varinfo; i++) {
694                 MonoInst *ins = cfg->varinfo [i];
695                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
696
697                 /* unused vars */
698                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
699                         continue;
700
701                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
702                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
703                         continue;
704
705                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
706                  * 8bit quantities in caller saved registers on x86 */
707                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
708                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
709                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
710                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
711                         g_assert (i == vmv->idx);
712                         vars = g_list_prepend (vars, vmv);
713                 }
714         }
715
716         vars = mono_varlist_sort (cfg, vars, 0);
717
718         return vars;
719 }
720
721 GList *
722 mono_arch_get_global_int_regs (MonoCompile *cfg)
723 {
724         GList *regs = NULL;
725
726         /* we can use 3 registers for global allocation */
727         regs = g_list_prepend (regs, (gpointer)X86_EBX);
728         regs = g_list_prepend (regs, (gpointer)X86_ESI);
729         regs = g_list_prepend (regs, (gpointer)X86_EDI);
730
731         return regs;
732 }
733
734 /*
735  * mono_arch_regalloc_cost:
736  *
737  *  Return the cost, in number of memory references, of the action of 
738  * allocating the variable VMV into a register during global register
739  * allocation.
740  */
741 guint32
742 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
743 {
744         MonoInst *ins = cfg->varinfo [vmv->idx];
745
746         if (cfg->method->save_lmf)
747                 /* The register is already saved */
748                 return (ins->opcode == OP_ARG) ? 1 : 0;
749         else
750                 /* push+pop+possible load if it is an argument */
751                 return (ins->opcode == OP_ARG) ? 3 : 2;
752 }
753  
754 /*
755  * Set var information according to the calling convention. X86 version.
756  * The locals var stuff should most likely be split in another method.
757  */
758 void
759 mono_arch_allocate_vars (MonoCompile *cfg)
760 {
761         MonoMethodSignature *sig;
762         MonoMethodHeader *header;
763         MonoInst *inst;
764         guint32 locals_stack_size, locals_stack_align;
765         int i, offset;
766         gint32 *offsets;
767         CallInfo *cinfo;
768
769         header = mono_method_get_header (cfg->method);
770         sig = mono_method_signature (cfg->method);
771
772         cinfo = get_call_info (sig, FALSE);
773
774         cfg->frame_reg = MONO_ARCH_BASEREG;
775         offset = 0;
776
777         /* Reserve space to save LMF and caller saved registers */
778
779         if (cfg->method->save_lmf) {
780                 offset += sizeof (MonoLMF);
781         } else {
782                 if (cfg->used_int_regs & (1 << X86_EBX)) {
783                         offset += 4;
784                 }
785
786                 if (cfg->used_int_regs & (1 << X86_EDI)) {
787                         offset += 4;
788                 }
789
790                 if (cfg->used_int_regs & (1 << X86_ESI)) {
791                         offset += 4;
792                 }
793         }
794
795         switch (cinfo->ret.storage) {
796         case ArgValuetypeInReg:
797                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
798                 offset += 8;
799                 cfg->ret->opcode = OP_REGOFFSET;
800                 cfg->ret->inst_basereg = X86_EBP;
801                 cfg->ret->inst_offset = - offset;
802                 break;
803         default:
804                 break;
805         }
806
807         /* Allocate locals */
808         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
809         if (locals_stack_align) {
810                 offset += (locals_stack_align - 1);
811                 offset &= ~(locals_stack_align - 1);
812         }
813         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
814                 if (offsets [i] != -1) {
815                         MonoInst *inst = cfg->varinfo [i];
816                         inst->opcode = OP_REGOFFSET;
817                         inst->inst_basereg = X86_EBP;
818                         inst->inst_offset = - (offset + offsets [i]);
819                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
820                 }
821         }
822         g_free (offsets);
823         offset += locals_stack_size;
824
825
826         /*
827          * Allocate arguments+return value
828          */
829
830         switch (cinfo->ret.storage) {
831         case ArgOnStack:
832                 cfg->ret->opcode = OP_REGOFFSET;
833                 cfg->ret->inst_basereg = X86_EBP;
834                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
835                 break;
836         case ArgValuetypeInReg:
837                 break;
838         case ArgInIReg:
839                 cfg->ret->opcode = OP_REGVAR;
840                 cfg->ret->inst_c0 = cinfo->ret.reg;
841                 break;
842         case ArgNone:
843         case ArgOnFloatFpStack:
844         case ArgOnDoubleFpStack:
845                 break;
846         default:
847                 g_assert_not_reached ();
848         }
849
850         if (sig->call_convention == MONO_CALL_VARARG) {
851                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
852                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
853         }
854
855         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
856                 ArgInfo *ainfo = &cinfo->args [i];
857                 inst = cfg->varinfo [i];
858                 if (inst->opcode != OP_REGVAR) {
859                         inst->opcode = OP_REGOFFSET;
860                         inst->inst_basereg = X86_EBP;
861                 }
862                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
863         }
864
865         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
866         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
867
868         cfg->stack_offset = offset;
869
870         g_free (cinfo);
871 }
872
873 void
874 mono_arch_create_vars (MonoCompile *cfg)
875 {
876         MonoMethodSignature *sig;
877         CallInfo *cinfo;
878
879         sig = mono_method_signature (cfg->method);
880
881         cinfo = get_call_info (sig, FALSE);
882
883         if (cinfo->ret.storage == ArgValuetypeInReg)
884                 cfg->ret_var_is_local = TRUE;
885
886         g_free (cinfo);
887 }
888
889 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
890  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
891  */
892
893 /* 
894  * take the arguments and generate the arch-specific
895  * instructions to properly call the function in call.
896  * This includes pushing, moving arguments to the right register
897  * etc.
898  */
899 MonoCallInst*
900 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
901         MonoInst *arg, *in;
902         MonoMethodSignature *sig;
903         int i, n;
904         CallInfo *cinfo;
905         int sentinelpos;
906
907         sig = call->signature;
908         n = sig->param_count + sig->hasthis;
909
910         cinfo = get_call_info (sig, FALSE);
911
912         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
913                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
914
915         for (i = 0; i < n; ++i) {
916                 ArgInfo *ainfo = cinfo->args + i;
917
918                 /* Emit the signature cookie just before the implicit arguments */
919                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
920                         MonoMethodSignature *tmp_sig;
921                         MonoInst *sig_arg;
922
923                         /* FIXME: Add support for signature tokens to AOT */
924                         cfg->disable_aot = TRUE;
925                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
926
927                         /*
928                          * mono_ArgIterator_Setup assumes the signature cookie is 
929                          * passed first and all the arguments which were before it are
930                          * passed on the stack after the signature. So compensate by 
931                          * passing a different signature.
932                          */
933                         tmp_sig = mono_metadata_signature_dup (call->signature);
934                         tmp_sig->param_count -= call->signature->sentinelpos;
935                         tmp_sig->sentinelpos = 0;
936                         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
937
938                         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
939                         sig_arg->inst_p0 = tmp_sig;
940
941                         arg->inst_left = sig_arg;
942                         arg->type = STACK_PTR;
943                         /* prepend, so they get reversed */
944                         arg->next = call->out_args;
945                         call->out_args = arg;
946                 }
947
948                 if (is_virtual && i == 0) {
949                         /* the argument will be attached to the call instrucion */
950                         in = call->args [i];
951                 } else {
952                         MonoType *t;
953
954                         if (i >= sig->hasthis)
955                                 t = sig->params [i - sig->hasthis];
956                         else
957                                 t = &mono_defaults.int_class->byval_arg;
958                         t = mono_type_get_underlying_type (t);
959
960                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
961                         in = call->args [i];
962                         arg->cil_code = in->cil_code;
963                         arg->inst_left = in;
964                         arg->type = in->type;
965                         /* prepend, so they get reversed */
966                         arg->next = call->out_args;
967                         call->out_args = arg;
968
969                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
970                                 guint32 size, align;
971
972                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
973                                         size = sizeof (MonoTypedRef);
974                                         align = sizeof (gpointer);
975                                 }
976                                 else
977                                         if (sig->pinvoke)
978                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
979                                         else {
980                                                 int ialign;
981                                                 size = mono_type_stack_size (&in->klass->byval_arg, &ialign);
982                                                 align = ialign;
983                                         }
984                                 arg->opcode = OP_OUTARG_VT;
985                                 arg->klass = in->klass;
986                                 arg->unused = sig->pinvoke;
987                                 arg->inst_imm = size; 
988                         }
989                         else {
990                                 switch (ainfo->storage) {
991                                 case ArgOnStack:
992                                         arg->opcode = OP_OUTARG;
993                                         if (!t->byref) {
994                                                 if (t->type == MONO_TYPE_R4)
995                                                         arg->opcode = OP_OUTARG_R4;
996                                                 else
997                                                         if (t->type == MONO_TYPE_R8)
998                                                                 arg->opcode = OP_OUTARG_R8;
999                                         }
1000                                         break;
1001                                 default:
1002                                         g_assert_not_reached ();
1003                                 }
1004                         }
1005                 }
1006         }
1007
1008         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1009                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1010                         MonoInst *zero_inst;
1011                         /*
1012                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1013                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1014                          * before calling the function. So we add a dummy instruction to represent pushing the 
1015                          * struct return address to the stack. The return address will be saved to this stack slot 
1016                          * by the code emitted in this_vret_args.
1017                          */
1018                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1019                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1020                         zero_inst->inst_p0 = 0;
1021                         arg->inst_left = zero_inst;
1022                         arg->type = STACK_PTR;
1023                         /* prepend, so they get reversed */
1024                         arg->next = call->out_args;
1025                         call->out_args = arg;
1026                 }
1027                 else
1028                         /* if the function returns a struct, the called method already does a ret $0x4 */
1029                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1030                                 cinfo->stack_usage -= 4;
1031         }
1032         
1033         call->stack_usage = cinfo->stack_usage;
1034
1035 #if defined(__APPLE__)
1036         if (cinfo->need_stack_align) {
1037                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1038                 arg->inst_c0 = cinfo->stack_align_amount;
1039                 arg->next = call->out_args;
1040                 call->out_args = arg;
1041         }
1042 #endif 
1043
1044         g_free (cinfo);
1045
1046         return call;
1047 }
1048
1049 /*
1050  * Allow tracing to work with this interface (with an optional argument)
1051  */
1052 void*
1053 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1054 {
1055         guchar *code = p;
1056
1057         /* if some args are passed in registers, we need to save them here */
1058         x86_push_reg (code, X86_EBP);
1059
1060         if (cfg->compile_aot) {
1061                 x86_push_imm (code, cfg->method);
1062                 x86_mov_reg_imm (code, X86_EAX, func);
1063                 x86_call_reg (code, X86_EAX);
1064         } else {
1065                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1066                 x86_push_imm (code, cfg->method);
1067                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1068                 x86_call_code (code, 0);
1069         }
1070         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1071
1072         return code;
1073 }
1074
1075 enum {
1076         SAVE_NONE,
1077         SAVE_STRUCT,
1078         SAVE_EAX,
1079         SAVE_EAX_EDX,
1080         SAVE_FP
1081 };
1082
1083 void*
1084 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1085 {
1086         guchar *code = p;
1087         int arg_size = 0, save_mode = SAVE_NONE;
1088         MonoMethod *method = cfg->method;
1089         
1090         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1091         case MONO_TYPE_VOID:
1092                 /* special case string .ctor icall */
1093                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1094                         save_mode = SAVE_EAX;
1095                 else
1096                         save_mode = SAVE_NONE;
1097                 break;
1098         case MONO_TYPE_I8:
1099         case MONO_TYPE_U8:
1100                 save_mode = SAVE_EAX_EDX;
1101                 break;
1102         case MONO_TYPE_R4:
1103         case MONO_TYPE_R8:
1104                 save_mode = SAVE_FP;
1105                 break;
1106         case MONO_TYPE_GENERICINST:
1107                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1108                         save_mode = SAVE_EAX;
1109                         break;
1110                 }
1111                 /* Fall through */
1112         case MONO_TYPE_VALUETYPE:
1113                 save_mode = SAVE_STRUCT;
1114                 break;
1115         default:
1116                 save_mode = SAVE_EAX;
1117                 break;
1118         }
1119
1120         switch (save_mode) {
1121         case SAVE_EAX_EDX:
1122                 x86_push_reg (code, X86_EDX);
1123                 x86_push_reg (code, X86_EAX);
1124                 if (enable_arguments) {
1125                         x86_push_reg (code, X86_EDX);
1126                         x86_push_reg (code, X86_EAX);
1127                         arg_size = 8;
1128                 }
1129                 break;
1130         case SAVE_EAX:
1131                 x86_push_reg (code, X86_EAX);
1132                 if (enable_arguments) {
1133                         x86_push_reg (code, X86_EAX);
1134                         arg_size = 4;
1135                 }
1136                 break;
1137         case SAVE_FP:
1138                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1139                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1140                 if (enable_arguments) {
1141                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1142                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1143                         arg_size = 8;
1144                 }
1145                 break;
1146         case SAVE_STRUCT:
1147                 if (enable_arguments) {
1148                         x86_push_membase (code, X86_EBP, 8);
1149                         arg_size = 4;
1150                 }
1151                 break;
1152         case SAVE_NONE:
1153         default:
1154                 break;
1155         }
1156
1157         if (cfg->compile_aot) {
1158                 x86_push_imm (code, method);
1159                 x86_mov_reg_imm (code, X86_EAX, func);
1160                 x86_call_reg (code, X86_EAX);
1161         } else {
1162                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1163                 x86_push_imm (code, method);
1164                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1165                 x86_call_code (code, 0);
1166         }
1167         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1168
1169         switch (save_mode) {
1170         case SAVE_EAX_EDX:
1171                 x86_pop_reg (code, X86_EAX);
1172                 x86_pop_reg (code, X86_EDX);
1173                 break;
1174         case SAVE_EAX:
1175                 x86_pop_reg (code, X86_EAX);
1176                 break;
1177         case SAVE_FP:
1178                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1179                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1180                 break;
1181         case SAVE_NONE:
1182         default:
1183                 break;
1184         }
1185
1186         return code;
1187 }
1188
1189 #define EMIT_COND_BRANCH(ins,cond,sign) \
1190 if (ins->flags & MONO_INST_BRLABEL) { \
1191         if (ins->inst_i0->inst_c0) { \
1192                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1193         } else { \
1194                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1195                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1196                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1197                         x86_branch8 (code, cond, 0, sign); \
1198                 else \
1199                         x86_branch32 (code, cond, 0, sign); \
1200         } \
1201 } else { \
1202         if (ins->inst_true_bb->native_offset) { \
1203                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1204         } else { \
1205                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1206                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1207                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1208                         x86_branch8 (code, cond, 0, sign); \
1209                 else \
1210                         x86_branch32 (code, cond, 0, sign); \
1211         } \
1212 }
1213
1214 /*  
1215  *      Emit an exception if condition is fail and
1216  *  if possible do a directly branch to target 
1217  */
1218 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1219         do {                                                        \
1220                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1221                 if (tins == NULL) {                                                                             \
1222                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1223                                         MONO_PATCH_INFO_EXC, exc_name);  \
1224                         x86_branch32 (code, cond, 0, signed);               \
1225                 } else {        \
1226                         EMIT_COND_BRANCH (tins, cond, signed);  \
1227                 }                       \
1228         } while (0); 
1229
1230 #define EMIT_FPCOMPARE(code) do { \
1231         x86_fcompp (code); \
1232         x86_fnstsw (code); \
1233 } while (0); 
1234
1235
1236 static guint8*
1237 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1238 {
1239         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1240         x86_call_code (code, 0);
1241
1242         return code;
1243 }
1244
1245 /* FIXME: Add more instructions */
1246 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1247
1248 static void
1249 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1250 {
1251         MonoInst *ins, *last_ins = NULL;
1252         ins = bb->code;
1253
1254         while (ins) {
1255
1256                 switch (ins->opcode) {
1257                 case OP_ICONST:
1258                         /* reg = 0 -> XOR (reg, reg) */
1259                         /* XOR sets cflags on x86, so we cant do it always */
1260                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1261                                 ins->opcode = CEE_XOR;
1262                                 ins->sreg1 = ins->dreg;
1263                                 ins->sreg2 = ins->dreg;
1264                         }
1265                         break;
1266                 case OP_MUL_IMM: 
1267                         /* remove unnecessary multiplication with 1 */
1268                         if (ins->inst_imm == 1) {
1269                                 if (ins->dreg != ins->sreg1) {
1270                                         ins->opcode = OP_MOVE;
1271                                 } else {
1272                                         last_ins->next = ins->next;
1273                                         ins = ins->next;
1274                                         continue;
1275                                 }
1276                         }
1277                         break;
1278                 case OP_COMPARE_IMM:
1279                         /* OP_COMPARE_IMM (reg, 0) 
1280                          * --> 
1281                          * OP_X86_TEST_NULL (reg) 
1282                          */
1283                         if (!ins->inst_imm)
1284                                 ins->opcode = OP_X86_TEST_NULL;
1285                         break;
1286                 case OP_X86_COMPARE_MEMBASE_IMM:
1287                         /* 
1288                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1289                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1290                          * -->
1291                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1292                          * OP_COMPARE_IMM reg, imm
1293                          *
1294                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1295                          */
1296                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1297                             ins->inst_basereg == last_ins->inst_destbasereg &&
1298                             ins->inst_offset == last_ins->inst_offset) {
1299                                         ins->opcode = OP_COMPARE_IMM;
1300                                         ins->sreg1 = last_ins->sreg1;
1301
1302                                         /* check if we can remove cmp reg,0 with test null */
1303                                         if (!ins->inst_imm)
1304                                                 ins->opcode = OP_X86_TEST_NULL;
1305                                 }
1306
1307                         break;
1308                 case OP_LOAD_MEMBASE:
1309                 case OP_LOADI4_MEMBASE:
1310                         /* 
1311                          * Note: if reg1 = reg2 the load op is removed
1312                          *
1313                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1314                          * OP_LOAD_MEMBASE offset(basereg), reg2
1315                          * -->
1316                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1317                          * OP_MOVE reg1, reg2
1318                          */
1319                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1320                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1321                             ins->inst_basereg == last_ins->inst_destbasereg &&
1322                             ins->inst_offset == last_ins->inst_offset) {
1323                                 if (ins->dreg == last_ins->sreg1) {
1324                                         last_ins->next = ins->next;                             
1325                                         ins = ins->next;                                
1326                                         continue;
1327                                 } else {
1328                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1329                                         ins->opcode = OP_MOVE;
1330                                         ins->sreg1 = last_ins->sreg1;
1331                                 }
1332
1333                         /* 
1334                          * Note: reg1 must be different from the basereg in the second load
1335                          * Note: if reg1 = reg2 is equal then second load is removed
1336                          *
1337                          * OP_LOAD_MEMBASE offset(basereg), reg1
1338                          * OP_LOAD_MEMBASE offset(basereg), reg2
1339                          * -->
1340                          * OP_LOAD_MEMBASE offset(basereg), reg1
1341                          * OP_MOVE reg1, reg2
1342                          */
1343                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1344                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1345                               ins->inst_basereg != last_ins->dreg &&
1346                               ins->inst_basereg == last_ins->inst_basereg &&
1347                               ins->inst_offset == last_ins->inst_offset) {
1348
1349                                 if (ins->dreg == last_ins->dreg) {
1350                                         last_ins->next = ins->next;                             
1351                                         ins = ins->next;                                
1352                                         continue;
1353                                 } else {
1354                                         ins->opcode = OP_MOVE;
1355                                         ins->sreg1 = last_ins->dreg;
1356                                 }
1357
1358                                 //g_assert_not_reached ();
1359
1360 #if 0
1361                         /* 
1362                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1363                          * OP_LOAD_MEMBASE offset(basereg), reg
1364                          * -->
1365                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1366                          * OP_ICONST reg, imm
1367                          */
1368                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1369                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1370                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1371                                    ins->inst_offset == last_ins->inst_offset) {
1372                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1373                                 ins->opcode = OP_ICONST;
1374                                 ins->inst_c0 = last_ins->inst_imm;
1375                                 g_assert_not_reached (); // check this rule
1376 #endif
1377                         }
1378                         break;
1379                 case OP_LOADU1_MEMBASE:
1380                 case OP_LOADI1_MEMBASE:
1381                         /* 
1382                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1383                          * OP_LOAD_MEMBASE offset(basereg), reg2
1384                          * -->
1385                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1386                          * CONV_I2/U2 reg1, reg2
1387                          */
1388                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1389                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1390                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1391                                         ins->inst_offset == last_ins->inst_offset) {
1392                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1393                                 ins->sreg1 = last_ins->sreg1;
1394                         }
1395                         break;
1396                 case OP_LOADU2_MEMBASE:
1397                 case OP_LOADI2_MEMBASE:
1398                         /* 
1399                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1400                          * OP_LOAD_MEMBASE offset(basereg), reg2
1401                          * -->
1402                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1403                          * CONV_I2/U2 reg1, reg2
1404                          */
1405                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1406                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1407                                         ins->inst_offset == last_ins->inst_offset) {
1408                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1409                                 ins->sreg1 = last_ins->sreg1;
1410                         }
1411                         break;
1412                 case CEE_CONV_I4:
1413                 case CEE_CONV_U4:
1414                 case OP_MOVE:
1415                         /*
1416                          * Removes:
1417                          *
1418                          * OP_MOVE reg, reg 
1419                          */
1420                         if (ins->dreg == ins->sreg1) {
1421                                 if (last_ins)
1422                                         last_ins->next = ins->next;                             
1423                                 ins = ins->next;
1424                                 continue;
1425                         }
1426                         /* 
1427                          * Removes:
1428                          *
1429                          * OP_MOVE sreg, dreg 
1430                          * OP_MOVE dreg, sreg
1431                          */
1432                         if (last_ins && last_ins->opcode == OP_MOVE &&
1433                             ins->sreg1 == last_ins->dreg &&
1434                             ins->dreg == last_ins->sreg1) {
1435                                 last_ins->next = ins->next;                             
1436                                 ins = ins->next;                                
1437                                 continue;
1438                         }
1439                         break;
1440                         
1441                 case OP_X86_PUSH_MEMBASE:
1442                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1443                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1444                             ins->inst_basereg == last_ins->inst_destbasereg &&
1445                             ins->inst_offset == last_ins->inst_offset) {
1446                                     ins->opcode = OP_X86_PUSH;
1447                                     ins->sreg1 = last_ins->sreg1;
1448                         }
1449                         break;
1450                 }
1451                 last_ins = ins;
1452                 ins = ins->next;
1453         }
1454         bb->last_ins = last_ins;
1455 }
1456
1457 static const int 
1458 branch_cc_table [] = {
1459         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1460         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1461         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1462 };
1463
1464 static const char*const * ins_spec = pentium_desc;
1465
1466 /*#include "cprop.c"*/
1467 void
1468 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1469 {
1470         mono_local_regalloc (cfg, bb);
1471 }
1472
1473 static unsigned char*
1474 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1475 {
1476         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1477         x86_fnstcw_membase(code, X86_ESP, 0);
1478         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1479         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1480         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1481         x86_fldcw_membase (code, X86_ESP, 2);
1482         if (size == 8) {
1483                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1484                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1485                 x86_pop_reg (code, dreg);
1486                 /* FIXME: need the high register 
1487                  * x86_pop_reg (code, dreg_high);
1488                  */
1489         } else {
1490                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1491                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1492                 x86_pop_reg (code, dreg);
1493         }
1494         x86_fldcw_membase (code, X86_ESP, 0);
1495         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1496
1497         if (size == 1)
1498                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1499         else if (size == 2)
1500                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1501         return code;
1502 }
1503
1504 static unsigned char*
1505 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1506 {
1507         int sreg = tree->sreg1;
1508         int need_touch = FALSE;
1509
1510 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1511         need_touch = TRUE;
1512 #endif
1513
1514         if (need_touch) {
1515                 guint8* br[5];
1516
1517                 /*
1518                  * Under Windows:
1519                  * If requested stack size is larger than one page,
1520                  * perform stack-touch operation
1521                  */
1522                 /*
1523                  * Generate stack probe code.
1524                  * Under Windows, it is necessary to allocate one page at a time,
1525                  * "touching" stack after each successful sub-allocation. This is
1526                  * because of the way stack growth is implemented - there is a
1527                  * guard page before the lowest stack page that is currently commited.
1528                  * Stack normally grows sequentially so OS traps access to the
1529                  * guard page and commits more pages when needed.
1530                  */
1531                 x86_test_reg_imm (code, sreg, ~0xFFF);
1532                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1533
1534                 br[2] = code; /* loop */
1535                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1536                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1537
1538                 /* 
1539                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1540                  * that follows only initializes the last part of the area.
1541                  */
1542                 /* Same as the init code below with size==0x1000 */
1543                 if (tree->flags & MONO_INST_INIT) {
1544                         x86_push_reg (code, X86_EAX);
1545                         x86_push_reg (code, X86_ECX);
1546                         x86_push_reg (code, X86_EDI);
1547                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1548                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1549                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1550                         x86_cld (code);
1551                         x86_prefix (code, X86_REP_PREFIX);
1552                         x86_stosl (code);
1553                         x86_pop_reg (code, X86_EDI);
1554                         x86_pop_reg (code, X86_ECX);
1555                         x86_pop_reg (code, X86_EAX);
1556                 }
1557
1558                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1559                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1560                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1561                 x86_patch (br[3], br[2]);
1562                 x86_test_reg_reg (code, sreg, sreg);
1563                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1564                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1565
1566                 br[1] = code; x86_jump8 (code, 0);
1567
1568                 x86_patch (br[0], code);
1569                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1570                 x86_patch (br[1], code);
1571                 x86_patch (br[4], code);
1572         }
1573         else
1574                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1575
1576         if (tree->flags & MONO_INST_INIT) {
1577                 int offset = 0;
1578                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1579                         x86_push_reg (code, X86_EAX);
1580                         offset += 4;
1581                 }
1582                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1583                         x86_push_reg (code, X86_ECX);
1584                         offset += 4;
1585                 }
1586                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1587                         x86_push_reg (code, X86_EDI);
1588                         offset += 4;
1589                 }
1590                 
1591                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1592                 if (sreg != X86_ECX)
1593                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1594                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1595                                 
1596                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1597                 x86_cld (code);
1598                 x86_prefix (code, X86_REP_PREFIX);
1599                 x86_stosl (code);
1600                 
1601                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1602                         x86_pop_reg (code, X86_EDI);
1603                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1604                         x86_pop_reg (code, X86_ECX);
1605                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1606                         x86_pop_reg (code, X86_EAX);
1607         }
1608         return code;
1609 }
1610
1611
1612 static guint8*
1613 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1614 {
1615         CallInfo *cinfo;
1616         int quad;
1617
1618         /* Move return value to the target register */
1619         switch (ins->opcode) {
1620         case CEE_CALL:
1621         case OP_CALL_REG:
1622         case OP_CALL_MEMBASE:
1623                 if (ins->dreg != X86_EAX)
1624                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1625                 break;
1626         case OP_VCALL:
1627         case OP_VCALL_REG:
1628         case OP_VCALL_MEMBASE:
1629                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
1630                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1631                         /* Pop the destination address from the stack */
1632                         x86_pop_reg (code, X86_ECX);
1633                         
1634                         for (quad = 0; quad < 2; quad ++) {
1635                                 switch (cinfo->ret.pair_storage [quad]) {
1636                                 case ArgInIReg:
1637                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1638                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1639                                         break;
1640                                 case ArgNone:
1641                                         break;
1642                                 default:
1643                                         g_assert_not_reached ();
1644                                 }
1645                         }
1646                 }
1647                 g_free (cinfo);
1648         default:
1649                 break;
1650         }
1651
1652         return code;
1653 }
1654
1655 /*
1656  * emit_tls_get:
1657  * @code: buffer to store code to
1658  * @dreg: hard register where to place the result
1659  * @tls_offset: offset info
1660  *
1661  * emit_tls_get emits in @code the native code that puts in the dreg register
1662  * the item in the thread local storage identified by tls_offset.
1663  *
1664  * Returns: a pointer to the end of the stored code
1665  */
1666 static guint8*
1667 emit_tls_get (guint8* code, int dreg, int tls_offset)
1668 {
1669 #ifdef PLATFORM_WIN32
1670         /* 
1671          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1672          * Journal and/or a disassembly of the TlsGet () function.
1673          */
1674         g_assert (tls_offset < 64);
1675         x86_prefix (code, X86_FS_PREFIX);
1676         x86_mov_reg_mem (code, dreg, 0x18, 4);
1677         /* Dunno what this does but TlsGetValue () contains it */
1678         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1679         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1680 #else
1681         if (optimize_for_xen) {
1682                 x86_prefix (code, X86_GS_PREFIX);
1683                 x86_mov_reg_mem (code, dreg, 0, 4);
1684                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1685         } else {
1686                 x86_prefix (code, X86_GS_PREFIX);
1687                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1688         }
1689 #endif
1690         return code;
1691 }
1692
1693 #define REAL_PRINT_REG(text,reg) \
1694 mono_assert (reg >= 0); \
1695 x86_push_reg (code, X86_EAX); \
1696 x86_push_reg (code, X86_EDX); \
1697 x86_push_reg (code, X86_ECX); \
1698 x86_push_reg (code, reg); \
1699 x86_push_imm (code, reg); \
1700 x86_push_imm (code, text " %d %p\n"); \
1701 x86_mov_reg_imm (code, X86_EAX, printf); \
1702 x86_call_reg (code, X86_EAX); \
1703 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1704 x86_pop_reg (code, X86_ECX); \
1705 x86_pop_reg (code, X86_EDX); \
1706 x86_pop_reg (code, X86_EAX);
1707
1708 /* benchmark and set based on cpu */
1709 #define LOOP_ALIGNMENT 8
1710 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
1711
1712 void
1713 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1714 {
1715         MonoInst *ins;
1716         MonoCallInst *call;
1717         guint offset;
1718         guint8 *code = cfg->native_code + cfg->code_len;
1719         MonoInst *last_ins = NULL;
1720         guint last_offset = 0;
1721         int max_len, cpos;
1722
1723         if (cfg->opt & MONO_OPT_PEEPHOLE)
1724                 peephole_pass (cfg, bb);
1725
1726         if (cfg->opt & MONO_OPT_LOOP) {
1727                 int pad, align = LOOP_ALIGNMENT;
1728                 /* set alignment depending on cpu */
1729                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
1730                         pad = align - pad;
1731                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
1732                         x86_padding (code, pad);
1733                         cfg->code_len += pad;
1734                         bb->native_offset = cfg->code_len;
1735                 }
1736         }
1737
1738         if (cfg->verbose_level > 2)
1739                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1740
1741         cpos = bb->max_offset;
1742
1743         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
1744                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
1745                 g_assert (!cfg->compile_aot);
1746                 cpos += 6;
1747
1748                 cov->data [bb->dfn].cil_code = bb->cil_code;
1749                 /* this is not thread save, but good enough */
1750                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
1751         }
1752
1753         offset = code - cfg->native_code;
1754
1755         mono_debug_open_block (cfg, bb, offset);
1756
1757         ins = bb->code;
1758         while (ins) {
1759                 offset = code - cfg->native_code;
1760
1761                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
1762
1763                 if (offset > (cfg->code_size - max_len - 16)) {
1764                         cfg->code_size *= 2;
1765                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
1766                         code = cfg->native_code + offset;
1767                         mono_jit_stats.code_reallocs++;
1768                 }
1769
1770                 mono_debug_record_line_number (cfg, ins, offset);
1771
1772                 switch (ins->opcode) {
1773                 case OP_BIGMUL:
1774                         x86_mul_reg (code, ins->sreg2, TRUE);
1775                         break;
1776                 case OP_BIGMUL_UN:
1777                         x86_mul_reg (code, ins->sreg2, FALSE);
1778                         break;
1779                 case OP_X86_SETEQ_MEMBASE:
1780                 case OP_X86_SETNE_MEMBASE:
1781                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
1782                                          ins->inst_basereg, ins->inst_offset, TRUE);
1783                         break;
1784                 case OP_STOREI1_MEMBASE_IMM:
1785                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
1786                         break;
1787                 case OP_STOREI2_MEMBASE_IMM:
1788                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
1789                         break;
1790                 case OP_STORE_MEMBASE_IMM:
1791                 case OP_STOREI4_MEMBASE_IMM:
1792                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
1793                         break;
1794                 case OP_STOREI1_MEMBASE_REG:
1795                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
1796                         break;
1797                 case OP_STOREI2_MEMBASE_REG:
1798                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
1799                         break;
1800                 case OP_STORE_MEMBASE_REG:
1801                 case OP_STOREI4_MEMBASE_REG:
1802                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
1803                         break;
1804                 case CEE_LDIND_I:
1805                 case CEE_LDIND_I4:
1806                 case CEE_LDIND_U4:
1807                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
1808                         break;
1809                 case OP_LOADU4_MEM:
1810                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
1811                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
1812                         break;
1813                 case OP_LOAD_MEMBASE:
1814                 case OP_LOADI4_MEMBASE:
1815                 case OP_LOADU4_MEMBASE:
1816                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
1817                         break;
1818                 case OP_LOADU1_MEMBASE:
1819                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
1820                         break;
1821                 case OP_LOADI1_MEMBASE:
1822                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
1823                         break;
1824                 case OP_LOADU2_MEMBASE:
1825                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
1826                         break;
1827                 case OP_LOADI2_MEMBASE:
1828                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
1829                         break;
1830                 case CEE_CONV_I1:
1831                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
1832                         break;
1833                 case CEE_CONV_I2:
1834                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
1835                         break;
1836                 case CEE_CONV_U1:
1837                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
1838                         break;
1839                 case CEE_CONV_U2:
1840                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
1841                         break;
1842                 case OP_COMPARE:
1843                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
1844                         break;
1845                 case OP_COMPARE_IMM:
1846                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
1847                         break;
1848                 case OP_X86_COMPARE_MEMBASE_REG:
1849                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
1850                         break;
1851                 case OP_X86_COMPARE_MEMBASE_IMM:
1852                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1853                         break;
1854                 case OP_X86_COMPARE_MEMBASE8_IMM:
1855                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1856                         break;
1857                 case OP_X86_COMPARE_REG_MEMBASE:
1858                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
1859                         break;
1860                 case OP_X86_COMPARE_MEM_IMM:
1861                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
1862                         break;
1863                 case OP_X86_TEST_NULL:
1864                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
1865                         break;
1866                 case OP_X86_ADD_MEMBASE_IMM:
1867                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1868                         break;
1869                 case OP_X86_ADD_MEMBASE:
1870                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
1871                         break;
1872                 case OP_X86_SUB_MEMBASE_IMM:
1873                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1874                         break;
1875                 case OP_X86_SUB_MEMBASE:
1876                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
1877                         break;
1878                 case OP_X86_AND_MEMBASE_IMM:
1879                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1880                         break;
1881                 case OP_X86_OR_MEMBASE_IMM:
1882                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1883                         break;
1884                 case OP_X86_XOR_MEMBASE_IMM:
1885                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1886                         break;
1887                 case OP_X86_INC_MEMBASE:
1888                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
1889                         break;
1890                 case OP_X86_INC_REG:
1891                         x86_inc_reg (code, ins->dreg);
1892                         break;
1893                 case OP_X86_DEC_MEMBASE:
1894                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
1895                         break;
1896                 case OP_X86_DEC_REG:
1897                         x86_dec_reg (code, ins->dreg);
1898                         break;
1899                 case OP_X86_MUL_MEMBASE:
1900                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
1901                         break;
1902                 case CEE_BREAK:
1903                         x86_breakpoint (code);
1904                         break;
1905                 case OP_ADDCC:
1906                 case CEE_ADD:
1907                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
1908                         break;
1909                 case OP_ADC:
1910                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
1911                         break;
1912                 case OP_ADDCC_IMM:
1913                 case OP_ADD_IMM:
1914                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
1915                         break;
1916                 case OP_ADC_IMM:
1917                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
1918                         break;
1919                 case OP_SUBCC:
1920                 case CEE_SUB:
1921                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
1922                         break;
1923                 case OP_SBB:
1924                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
1925                         break;
1926                 case OP_SUBCC_IMM:
1927                 case OP_SUB_IMM:
1928                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
1929                         break;
1930                 case OP_SBB_IMM:
1931                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
1932                         break;
1933                 case CEE_AND:
1934                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
1935                         break;
1936                 case OP_AND_IMM:
1937                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
1938                         break;
1939                 case CEE_DIV:
1940                         x86_cdq (code);
1941                         x86_div_reg (code, ins->sreg2, TRUE);
1942                         break;
1943                 case CEE_DIV_UN:
1944                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1945                         x86_div_reg (code, ins->sreg2, FALSE);
1946                         break;
1947                 case OP_DIV_IMM:
1948                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1949                         x86_cdq (code);
1950                         x86_div_reg (code, ins->sreg2, TRUE);
1951                         break;
1952                 case CEE_REM:
1953                         x86_cdq (code);
1954                         x86_div_reg (code, ins->sreg2, TRUE);
1955                         break;
1956                 case CEE_REM_UN:
1957                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1958                         x86_div_reg (code, ins->sreg2, FALSE);
1959                         break;
1960                 case OP_REM_IMM:
1961                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1962                         x86_cdq (code);
1963                         x86_div_reg (code, ins->sreg2, TRUE);
1964                         break;
1965                 case CEE_OR:
1966                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
1967                         break;
1968                 case OP_OR_IMM:
1969                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
1970                         break;
1971                 case CEE_XOR:
1972                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
1973                         break;
1974                 case OP_XOR_IMM:
1975                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
1976                         break;
1977                 case CEE_SHL:
1978                         g_assert (ins->sreg2 == X86_ECX);
1979                         x86_shift_reg (code, X86_SHL, ins->dreg);
1980                         break;
1981                 case CEE_SHR:
1982                         g_assert (ins->sreg2 == X86_ECX);
1983                         x86_shift_reg (code, X86_SAR, ins->dreg);
1984                         break;
1985                 case OP_SHR_IMM:
1986                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
1987                         break;
1988                 case OP_SHR_UN_IMM:
1989                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
1990                         break;
1991                 case CEE_SHR_UN:
1992                         g_assert (ins->sreg2 == X86_ECX);
1993                         x86_shift_reg (code, X86_SHR, ins->dreg);
1994                         break;
1995                 case OP_SHL_IMM:
1996                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
1997                         break;
1998                 case OP_LSHL: {
1999                         guint8 *jump_to_end;
2000
2001                         /* handle shifts below 32 bits */
2002                         x86_shld_reg (code, ins->unused, ins->sreg1);
2003                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2004
2005                         x86_test_reg_imm (code, X86_ECX, 32);
2006                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2007
2008                         /* handle shift over 32 bit */
2009                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
2010                         x86_clear_reg (code, ins->sreg1);
2011                         
2012                         x86_patch (jump_to_end, code);
2013                         }
2014                         break;
2015                 case OP_LSHR: {
2016                         guint8 *jump_to_end;
2017
2018                         /* handle shifts below 32 bits */
2019                         x86_shrd_reg (code, ins->sreg1, ins->unused);
2020                         x86_shift_reg (code, X86_SAR, ins->unused);
2021
2022                         x86_test_reg_imm (code, X86_ECX, 32);
2023                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2024
2025                         /* handle shifts over 31 bits */
2026                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2027                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
2028                         
2029                         x86_patch (jump_to_end, code);
2030                         }
2031                         break;
2032                 case OP_LSHR_UN: {
2033                         guint8 *jump_to_end;
2034
2035                         /* handle shifts below 32 bits */
2036                         x86_shrd_reg (code, ins->sreg1, ins->unused);
2037                         x86_shift_reg (code, X86_SHR, ins->unused);
2038
2039                         x86_test_reg_imm (code, X86_ECX, 32);
2040                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2041
2042                         /* handle shifts over 31 bits */
2043                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2044                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
2045                         
2046                         x86_patch (jump_to_end, code);
2047                         }
2048                         break;
2049                 case OP_LSHL_IMM:
2050                         if (ins->inst_imm >= 32) {
2051                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
2052                                 x86_clear_reg (code, ins->sreg1);
2053                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
2054                         } else {
2055                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
2056                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2057                         }
2058                         break;
2059                 case OP_LSHR_IMM:
2060                         if (ins->inst_imm >= 32) {
2061                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
2062                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
2063                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2064                         } else {
2065                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2066                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
2067                         }
2068                         break;
2069                 case OP_LSHR_UN_IMM:
2070                         if (ins->inst_imm >= 32) {
2071                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2072                                 x86_clear_reg (code, ins->unused);
2073                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2074                         } else {
2075                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2076                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
2077                         }
2078                         break;
2079                 case CEE_NOT:
2080                         x86_not_reg (code, ins->sreg1);
2081                         break;
2082                 case CEE_NEG:
2083                         x86_neg_reg (code, ins->sreg1);
2084                         break;
2085                 case OP_SEXT_I1:
2086                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2087                         break;
2088                 case OP_SEXT_I2:
2089                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2090                         break;
2091                 case CEE_MUL:
2092                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2093                         break;
2094                 case OP_MUL_IMM:
2095                         switch (ins->inst_imm) {
2096                         case 2:
2097                                 /* MOV r1, r2 */
2098                                 /* ADD r1, r1 */
2099                                 if (ins->dreg != ins->sreg1)
2100                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2101                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2102                                 break;
2103                         case 3:
2104                                 /* LEA r1, [r2 + r2*2] */
2105                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2106                                 break;
2107                         case 5:
2108                                 /* LEA r1, [r2 + r2*4] */
2109                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2110                                 break;
2111                         case 6:
2112                                 /* LEA r1, [r2 + r2*2] */
2113                                 /* ADD r1, r1          */
2114                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2115                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2116                                 break;
2117                         case 9:
2118                                 /* LEA r1, [r2 + r2*8] */
2119                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2120                                 break;
2121                         case 10:
2122                                 /* LEA r1, [r2 + r2*4] */
2123                                 /* ADD r1, r1          */
2124                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2125                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2126                                 break;
2127                         case 12:
2128                                 /* LEA r1, [r2 + r2*2] */
2129                                 /* SHL r1, 2           */
2130                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2131                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2132                                 break;
2133                         case 25:
2134                                 /* LEA r1, [r2 + r2*4] */
2135                                 /* LEA r1, [r1 + r1*4] */
2136                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2137                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2138                                 break;
2139                         case 100:
2140                                 /* LEA r1, [r2 + r2*4] */
2141                                 /* SHL r1, 2           */
2142                                 /* LEA r1, [r1 + r1*4] */
2143                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2144                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2145                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2146                                 break;
2147                         default:
2148                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2149                                 break;
2150                         }
2151                         break;
2152                 case CEE_MUL_OVF:
2153                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2154                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2155                         break;
2156                 case CEE_MUL_OVF_UN: {
2157                         /* the mul operation and the exception check should most likely be split */
2158                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2159                         /*g_assert (ins->sreg2 == X86_EAX);
2160                         g_assert (ins->dreg == X86_EAX);*/
2161                         if (ins->sreg2 == X86_EAX) {
2162                                 non_eax_reg = ins->sreg1;
2163                         } else if (ins->sreg1 == X86_EAX) {
2164                                 non_eax_reg = ins->sreg2;
2165                         } else {
2166                                 /* no need to save since we're going to store to it anyway */
2167                                 if (ins->dreg != X86_EAX) {
2168                                         saved_eax = TRUE;
2169                                         x86_push_reg (code, X86_EAX);
2170                                 }
2171                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2172                                 non_eax_reg = ins->sreg2;
2173                         }
2174                         if (ins->dreg == X86_EDX) {
2175                                 if (!saved_eax) {
2176                                         saved_eax = TRUE;
2177                                         x86_push_reg (code, X86_EAX);
2178                                 }
2179                         } else if (ins->dreg != X86_EAX) {
2180                                 saved_edx = TRUE;
2181                                 x86_push_reg (code, X86_EDX);
2182                         }
2183                         x86_mul_reg (code, non_eax_reg, FALSE);
2184                         /* save before the check since pop and mov don't change the flags */
2185                         if (ins->dreg != X86_EAX)
2186                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2187                         if (saved_edx)
2188                                 x86_pop_reg (code, X86_EDX);
2189                         if (saved_eax)
2190                                 x86_pop_reg (code, X86_EAX);
2191                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2192                         break;
2193                 }
2194                 case OP_ICONST:
2195                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2196                         break;
2197                 case OP_AOTCONST:
2198                         g_assert_not_reached ();
2199                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2200                         x86_mov_reg_imm (code, ins->dreg, 0);
2201                         break;
2202                 case OP_LOAD_GOTADDR:
2203                         x86_call_imm (code, 0);
2204                         /* 
2205                          * The patch needs to point to the pop, since the GOT offset needs 
2206                          * to be added to that address.
2207                          */
2208                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2209                         x86_pop_reg (code, ins->dreg);
2210                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2211                         break;
2212                 case OP_GOT_ENTRY:
2213                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2214                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2215                         break;
2216                 case OP_X86_PUSH_GOT_ENTRY:
2217                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2218                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2219                         break;
2220                 case CEE_CONV_I4:
2221                 case OP_MOVE:
2222                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2223                         break;
2224                 case CEE_CONV_U4:
2225                         g_assert_not_reached ();
2226                 case CEE_JMP: {
2227                         /*
2228                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2229                          * Keep in sync with the code in emit_epilog.
2230                          */
2231                         int pos = 0;
2232
2233                         /* FIXME: no tracing support... */
2234                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2235                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2236                         /* reset offset to make max_len work */
2237                         offset = code - cfg->native_code;
2238
2239                         g_assert (!cfg->method->save_lmf);
2240
2241                         if (cfg->used_int_regs & (1 << X86_EBX))
2242                                 pos -= 4;
2243                         if (cfg->used_int_regs & (1 << X86_EDI))
2244                                 pos -= 4;
2245                         if (cfg->used_int_regs & (1 << X86_ESI))
2246                                 pos -= 4;
2247                         if (pos)
2248                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2249         
2250                         if (cfg->used_int_regs & (1 << X86_ESI))
2251                                 x86_pop_reg (code, X86_ESI);
2252                         if (cfg->used_int_regs & (1 << X86_EDI))
2253                                 x86_pop_reg (code, X86_EDI);
2254                         if (cfg->used_int_regs & (1 << X86_EBX))
2255                                 x86_pop_reg (code, X86_EBX);
2256         
2257                         /* restore ESP/EBP */
2258                         x86_leave (code);
2259                         offset = code - cfg->native_code;
2260                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2261                         x86_jump32 (code, 0);
2262                         break;
2263                 }
2264                 case OP_CHECK_THIS:
2265                         /* ensure ins->sreg1 is not NULL
2266                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2267                          * cmp DWORD PTR [eax], 0
2268                          */
2269                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2270                         break;
2271                 case OP_ARGLIST: {
2272                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2273                         x86_push_reg (code, hreg);
2274                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2275                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2276                         x86_pop_reg (code, hreg);
2277                         break;
2278                 }
2279                 case OP_FCALL:
2280                 case OP_LCALL:
2281                 case OP_VCALL:
2282                 case OP_VOIDCALL:
2283                 case CEE_CALL:
2284                         call = (MonoCallInst*)ins;
2285                         if (ins->flags & MONO_INST_HAS_METHOD)
2286                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2287                         else
2288                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2289                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2290                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2291                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2292                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2293                                  * smart enough to do that optimization yet
2294                                  *
2295                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2296                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2297                                  * (most likely from locality benefits). People with other processors should
2298                                  * check on theirs to see what happens.
2299                                  */
2300                                 if (call->stack_usage == 4) {
2301                                         /* we want to use registers that won't get used soon, so use
2302                                          * ecx, as eax will get allocated first. edx is used by long calls,
2303                                          * so we can't use that.
2304                                          */
2305                                         
2306                                         x86_pop_reg (code, X86_ECX);
2307                                 } else {
2308                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2309                                 }
2310                         }
2311                         code = emit_move_return_value (cfg, ins, code);
2312                         break;
2313                 case OP_FCALL_REG:
2314                 case OP_LCALL_REG:
2315                 case OP_VCALL_REG:
2316                 case OP_VOIDCALL_REG:
2317                 case OP_CALL_REG:
2318                         call = (MonoCallInst*)ins;
2319                         x86_call_reg (code, ins->sreg1);
2320                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2321                                 if (call->stack_usage == 4)
2322                                         x86_pop_reg (code, X86_ECX);
2323                                 else
2324                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2325                         }
2326                         code = emit_move_return_value (cfg, ins, code);
2327                         break;
2328                 case OP_FCALL_MEMBASE:
2329                 case OP_LCALL_MEMBASE:
2330                 case OP_VCALL_MEMBASE:
2331                 case OP_VOIDCALL_MEMBASE:
2332                 case OP_CALL_MEMBASE:
2333                         call = (MonoCallInst*)ins;
2334                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2335                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2336                                 if (call->stack_usage == 4)
2337                                         x86_pop_reg (code, X86_ECX);
2338                                 else
2339                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2340                         }
2341                         code = emit_move_return_value (cfg, ins, code);
2342                         break;
2343                 case OP_OUTARG:
2344                 case OP_X86_PUSH:
2345                         x86_push_reg (code, ins->sreg1);
2346                         break;
2347                 case OP_X86_PUSH_IMM:
2348                         x86_push_imm (code, ins->inst_imm);
2349                         break;
2350                 case OP_X86_PUSH_MEMBASE:
2351                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2352                         break;
2353                 case OP_X86_PUSH_OBJ: 
2354                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2355                         x86_push_reg (code, X86_EDI);
2356                         x86_push_reg (code, X86_ESI);
2357                         x86_push_reg (code, X86_ECX);
2358                         if (ins->inst_offset)
2359                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2360                         else
2361                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2362                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2363                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2364                         x86_cld (code);
2365                         x86_prefix (code, X86_REP_PREFIX);
2366                         x86_movsd (code);
2367                         x86_pop_reg (code, X86_ECX);
2368                         x86_pop_reg (code, X86_ESI);
2369                         x86_pop_reg (code, X86_EDI);
2370                         break;
2371                 case OP_X86_LEA:
2372                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2373                         break;
2374                 case OP_X86_LEA_MEMBASE:
2375                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2376                         break;
2377                 case OP_X86_XCHG:
2378                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2379                         break;
2380                 case OP_LOCALLOC:
2381                         /* keep alignment */
2382                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2383                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2384                         code = mono_emit_stack_alloc (code, ins);
2385                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2386                         break;
2387                 case CEE_RET:
2388                         x86_ret (code);
2389                         break;
2390                 case CEE_THROW: {
2391                         x86_push_reg (code, ins->sreg1);
2392                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2393                                                           (gpointer)"mono_arch_throw_exception");
2394                         break;
2395                 }
2396                 case OP_RETHROW: {
2397                         x86_push_reg (code, ins->sreg1);
2398                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2399                                                           (gpointer)"mono_arch_rethrow_exception");
2400                         break;
2401                 }
2402                 case OP_CALL_HANDLER: 
2403                         /* Align stack */
2404 #ifdef __APPLE__
2405                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2406 #endif
2407                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2408                         x86_call_imm (code, 0);
2409 #ifdef __APPLE__
2410                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2411 #endif
2412                         break;
2413                 case OP_LABEL:
2414                         ins->inst_c0 = code - cfg->native_code;
2415                         break;
2416                 case CEE_BR:
2417                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2418                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2419                         //break;
2420                         if (ins->flags & MONO_INST_BRLABEL) {
2421                                 if (ins->inst_i0->inst_c0) {
2422                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2423                                 } else {
2424                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2425                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2426                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2427                                                 x86_jump8 (code, 0);
2428                                         else 
2429                                                 x86_jump32 (code, 0);
2430                                 }
2431                         } else {
2432                                 if (ins->inst_target_bb->native_offset) {
2433                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2434                                 } else {
2435                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2436                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2437                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2438                                                 x86_jump8 (code, 0);
2439                                         else 
2440                                                 x86_jump32 (code, 0);
2441                                 } 
2442                         }
2443                         break;
2444                 case OP_BR_REG:
2445                         x86_jump_reg (code, ins->sreg1);
2446                         break;
2447                 case OP_CEQ:
2448                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2449                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2450                         break;
2451                 case OP_CLT:
2452                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2453                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2454                         break;
2455                 case OP_CLT_UN:
2456                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2457                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2458                         break;
2459                 case OP_CGT:
2460                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2461                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2462                         break;
2463                 case OP_CGT_UN:
2464                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2465                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2466                         break;
2467                 case OP_CNE:
2468                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2469                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2470                         break;
2471                 case OP_COND_EXC_EQ:
2472                 case OP_COND_EXC_NE_UN:
2473                 case OP_COND_EXC_LT:
2474                 case OP_COND_EXC_LT_UN:
2475                 case OP_COND_EXC_GT:
2476                 case OP_COND_EXC_GT_UN:
2477                 case OP_COND_EXC_GE:
2478                 case OP_COND_EXC_GE_UN:
2479                 case OP_COND_EXC_LE:
2480                 case OP_COND_EXC_LE_UN:
2481                 case OP_COND_EXC_OV:
2482                 case OP_COND_EXC_NO:
2483                 case OP_COND_EXC_C:
2484                 case OP_COND_EXC_NC:
2485                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2486                         break;
2487                 case CEE_BEQ:
2488                 case CEE_BNE_UN:
2489                 case CEE_BLT:
2490                 case CEE_BLT_UN:
2491                 case CEE_BGT:
2492                 case CEE_BGT_UN:
2493                 case CEE_BGE:
2494                 case CEE_BGE_UN:
2495                 case CEE_BLE:
2496                 case CEE_BLE_UN:
2497                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2498                         break;
2499
2500                 /* floating point opcodes */
2501                 case OP_R8CONST: {
2502                         double d = *(double *)ins->inst_p0;
2503
2504                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2505                                 x86_fldz (code);
2506                         } else if (d == 1.0) {
2507                                 x86_fld1 (code);
2508                         } else {
2509                                 if (cfg->compile_aot) {
2510                                         guint32 *val = (guint32*)&d;
2511                                         x86_push_imm (code, val [1]);
2512                                         x86_push_imm (code, val [0]);
2513                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2514                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2515                                 }
2516                                 else {
2517                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2518                                         x86_fld (code, NULL, TRUE);
2519                                 }
2520                         }
2521                         break;
2522                 }
2523                 case OP_R4CONST: {
2524                         float f = *(float *)ins->inst_p0;
2525
2526                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2527                                 x86_fldz (code);
2528                         } else if (f == 1.0) {
2529                                 x86_fld1 (code);
2530                         } else {
2531                                 if (cfg->compile_aot) {
2532                                         guint32 val = *(guint32*)&f;
2533                                         x86_push_imm (code, val);
2534                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2535                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2536                                 }
2537                                 else {
2538                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2539                                         x86_fld (code, NULL, FALSE);
2540                                 }
2541                         }
2542                         break;
2543                 }
2544                 case OP_STORER8_MEMBASE_REG:
2545                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2546                         break;
2547                 case OP_LOADR8_SPILL_MEMBASE:
2548                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2549                         x86_fxch (code, 1);
2550                         break;
2551                 case OP_LOADR8_MEMBASE:
2552                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2553                         break;
2554                 case OP_STORER4_MEMBASE_REG:
2555                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2556                         break;
2557                 case OP_LOADR4_MEMBASE:
2558                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2559                         break;
2560                 case CEE_CONV_R4: /* FIXME: change precision */
2561                 case CEE_CONV_R8:
2562                         x86_push_reg (code, ins->sreg1);
2563                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2564                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2565                         break;
2566                 case OP_X86_FP_LOAD_I8:
2567                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2568                         break;
2569                 case OP_X86_FP_LOAD_I4:
2570                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2571                         break;
2572                 case OP_FCONV_TO_I1:
2573                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2574                         break;
2575                 case OP_FCONV_TO_U1:
2576                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2577                         break;
2578                 case OP_FCONV_TO_I2:
2579                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2580                         break;
2581                 case OP_FCONV_TO_U2:
2582                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2583                         break;
2584                 case OP_FCONV_TO_I4:
2585                 case OP_FCONV_TO_I:
2586                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2587                         break;
2588                 case OP_FCONV_TO_I8:
2589                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2590                         x86_fnstcw_membase(code, X86_ESP, 0);
2591                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2592                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2593                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2594                         x86_fldcw_membase (code, X86_ESP, 2);
2595                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2596                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2597                         x86_pop_reg (code, ins->dreg);
2598                         x86_pop_reg (code, ins->unused);
2599                         x86_fldcw_membase (code, X86_ESP, 0);
2600                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2601                         break;
2602                 case OP_LCONV_TO_R_UN: { 
2603                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2604                         guint8 *br;
2605
2606                         /* load 64bit integer to FP stack */
2607                         x86_push_imm (code, 0);
2608                         x86_push_reg (code, ins->sreg2);
2609                         x86_push_reg (code, ins->sreg1);
2610                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2611                         /* store as 80bit FP value */
2612                         x86_fst80_membase (code, X86_ESP, 0);
2613                         
2614                         /* test if lreg is negative */
2615                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2616                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2617         
2618                         /* add correction constant mn */
2619                         x86_fld80_mem (code, mn);
2620                         x86_fld80_membase (code, X86_ESP, 0);
2621                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2622                         x86_fst80_membase (code, X86_ESP, 0);
2623
2624                         x86_patch (br, code);
2625
2626                         x86_fld80_membase (code, X86_ESP, 0);
2627                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2628
2629                         break;
2630                 }
2631                 case OP_LCONV_TO_OVF_I: {
2632                         guint8 *br [3], *label [1];
2633                         MonoInst *tins;
2634
2635                         /* 
2636                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2637                          */
2638                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2639
2640                         /* If the low word top bit is set, see if we are negative */
2641                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2642                         /* We are not negative (no top bit set, check for our top word to be zero */
2643                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2644                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2645                         label [0] = code;
2646
2647                         /* throw exception */
2648                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2649                         if (tins) {
2650                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2651                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2652                                         x86_jump8 (code, 0);
2653                                 else
2654                                         x86_jump32 (code, 0);
2655                         } else {
2656                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2657                                 x86_jump32 (code, 0);
2658                         }
2659         
2660         
2661                         x86_patch (br [0], code);
2662                         /* our top bit is set, check that top word is 0xfffffff */
2663                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2664                 
2665                         x86_patch (br [1], code);
2666                         /* nope, emit exception */
2667                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2668                         x86_patch (br [2], label [0]);
2669
2670                         if (ins->dreg != ins->sreg1)
2671                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2672                         break;
2673                 }
2674                 case OP_FADD:
2675                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2676                         break;
2677                 case OP_FSUB:
2678                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2679                         break;          
2680                 case OP_FMUL:
2681                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2682                         break;          
2683                 case OP_FDIV:
2684                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2685                         break;          
2686                 case OP_FNEG:
2687                         x86_fchs (code);
2688                         break;          
2689                 case OP_SIN:
2690                         x86_fsin (code);
2691                         x86_fldz (code);
2692                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2693                         break;          
2694                 case OP_COS:
2695                         x86_fcos (code);
2696                         x86_fldz (code);
2697                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2698                         break;          
2699                 case OP_ABS:
2700                         x86_fabs (code);
2701                         break;          
2702                 case OP_TAN: {
2703                         /* 
2704                          * it really doesn't make sense to inline all this code,
2705                          * it's here just to show that things may not be as simple 
2706                          * as they appear.
2707                          */
2708                         guchar *check_pos, *end_tan, *pop_jump;
2709                         x86_push_reg (code, X86_EAX);
2710                         x86_fptan (code);
2711                         x86_fnstsw (code);
2712                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2713                         check_pos = code;
2714                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2715                         x86_fstp (code, 0); /* pop the 1.0 */
2716                         end_tan = code;
2717                         x86_jump8 (code, 0);
2718                         x86_fldpi (code);
2719                         x86_fp_op (code, X86_FADD, 0);
2720                         x86_fxch (code, 1);
2721                         x86_fprem1 (code);
2722                         x86_fstsw (code);
2723                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2724                         pop_jump = code;
2725                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2726                         x86_fstp (code, 1);
2727                         x86_fptan (code);
2728                         x86_patch (pop_jump, code);
2729                         x86_fstp (code, 0); /* pop the 1.0 */
2730                         x86_patch (check_pos, code);
2731                         x86_patch (end_tan, code);
2732                         x86_fldz (code);
2733                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2734                         x86_pop_reg (code, X86_EAX);
2735                         break;
2736                 }
2737                 case OP_ATAN:
2738                         x86_fld1 (code);
2739                         x86_fpatan (code);
2740                         x86_fldz (code);
2741                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2742                         break;          
2743                 case OP_SQRT:
2744                         x86_fsqrt (code);
2745                         break;          
2746                 case OP_X86_FPOP:
2747                         x86_fstp (code, 0);
2748                         break;          
2749                 case OP_FREM: {
2750                         guint8 *l1, *l2;
2751
2752                         x86_push_reg (code, X86_EAX);
2753                         /* we need to exchange ST(0) with ST(1) */
2754                         x86_fxch (code, 1);
2755
2756                         /* this requires a loop, because fprem somtimes 
2757                          * returns a partial remainder */
2758                         l1 = code;
2759                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2760                         /* x86_fprem1 (code); */
2761                         x86_fprem (code);
2762                         x86_fnstsw (code);
2763                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
2764                         l2 = code + 2;
2765                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2766
2767                         /* pop result */
2768                         x86_fstp (code, 1);
2769
2770                         x86_pop_reg (code, X86_EAX);
2771                         break;
2772                 }
2773                 case OP_FCOMPARE:
2774                         if (cfg->opt & MONO_OPT_FCMOV) {
2775                                 x86_fcomip (code, 1);
2776                                 x86_fstp (code, 0);
2777                                 break;
2778                         }
2779                         /* this overwrites EAX */
2780                         EMIT_FPCOMPARE(code);
2781                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2782                         break;
2783                 case OP_FCEQ:
2784                         if (cfg->opt & MONO_OPT_FCMOV) {
2785                                 /* zeroing the register at the start results in 
2786                                  * shorter and faster code (we can also remove the widening op)
2787                                  */
2788                                 guchar *unordered_check;
2789                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2790                                 x86_fcomip (code, 1);
2791                                 x86_fstp (code, 0);
2792                                 unordered_check = code;
2793                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2794                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2795                                 x86_patch (unordered_check, code);
2796                                 break;
2797                         }
2798                         if (ins->dreg != X86_EAX) 
2799                                 x86_push_reg (code, X86_EAX);
2800
2801                         EMIT_FPCOMPARE(code);
2802                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2803                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2804                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2805                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2806
2807                         if (ins->dreg != X86_EAX) 
2808                                 x86_pop_reg (code, X86_EAX);
2809                         break;
2810                 case OP_FCLT:
2811                 case OP_FCLT_UN:
2812                         if (cfg->opt & MONO_OPT_FCMOV) {
2813                                 /* zeroing the register at the start results in 
2814                                  * shorter and faster code (we can also remove the widening op)
2815                                  */
2816                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2817                                 x86_fcomip (code, 1);
2818                                 x86_fstp (code, 0);
2819                                 if (ins->opcode == OP_FCLT_UN) {
2820                                         guchar *unordered_check = code;
2821                                         guchar *jump_to_end;
2822                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2823                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2824                                         jump_to_end = code;
2825                                         x86_jump8 (code, 0);
2826                                         x86_patch (unordered_check, code);
2827                                         x86_inc_reg (code, ins->dreg);
2828                                         x86_patch (jump_to_end, code);
2829                                 } else {
2830                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2831                                 }
2832                                 break;
2833                         }
2834                         if (ins->dreg != X86_EAX) 
2835                                 x86_push_reg (code, X86_EAX);
2836
2837                         EMIT_FPCOMPARE(code);
2838                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2839                         if (ins->opcode == OP_FCLT_UN) {
2840                                 guchar *is_not_zero_check, *end_jump;
2841                                 is_not_zero_check = code;
2842                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2843                                 end_jump = code;
2844                                 x86_jump8 (code, 0);
2845                                 x86_patch (is_not_zero_check, code);
2846                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2847
2848                                 x86_patch (end_jump, code);
2849                         }
2850                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2851                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2852
2853                         if (ins->dreg != X86_EAX) 
2854                                 x86_pop_reg (code, X86_EAX);
2855                         break;
2856                 case OP_FCGT:
2857                 case OP_FCGT_UN:
2858                         if (cfg->opt & MONO_OPT_FCMOV) {
2859                                 /* zeroing the register at the start results in 
2860                                  * shorter and faster code (we can also remove the widening op)
2861                                  */
2862                                 guchar *unordered_check;
2863                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2864                                 x86_fcomip (code, 1);
2865                                 x86_fstp (code, 0);
2866                                 if (ins->opcode == OP_FCGT) {
2867                                         unordered_check = code;
2868                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2869                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2870                                         x86_patch (unordered_check, code);
2871                                 } else {
2872                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2873                                 }
2874                                 break;
2875                         }
2876                         if (ins->dreg != X86_EAX) 
2877                                 x86_push_reg (code, X86_EAX);
2878
2879                         EMIT_FPCOMPARE(code);
2880                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2881                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2882                         if (ins->opcode == OP_FCGT_UN) {
2883                                 guchar *is_not_zero_check, *end_jump;
2884                                 is_not_zero_check = code;
2885                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2886                                 end_jump = code;
2887                                 x86_jump8 (code, 0);
2888                                 x86_patch (is_not_zero_check, code);
2889                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2890         
2891                                 x86_patch (end_jump, code);
2892                         }
2893                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2894                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2895
2896                         if (ins->dreg != X86_EAX) 
2897                                 x86_pop_reg (code, X86_EAX);
2898                         break;
2899                 case OP_FBEQ:
2900                         if (cfg->opt & MONO_OPT_FCMOV) {
2901                                 guchar *jump = code;
2902                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
2903                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2904                                 x86_patch (jump, code);
2905                                 break;
2906                         }
2907                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2908                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2909                         break;
2910                 case OP_FBNE_UN:
2911                         /* Branch if C013 != 100 */
2912                         if (cfg->opt & MONO_OPT_FCMOV) {
2913                                 /* branch if !ZF or (PF|CF) */
2914                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2915                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2916                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
2917                                 break;
2918                         }
2919                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2920                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2921                         break;
2922                 case OP_FBLT:
2923                         if (cfg->opt & MONO_OPT_FCMOV) {
2924                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2925                                 break;
2926                         }
2927                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2928                         break;
2929                 case OP_FBLT_UN:
2930                         if (cfg->opt & MONO_OPT_FCMOV) {
2931                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2932                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2933                                 break;
2934                         }
2935                         if (ins->opcode == OP_FBLT_UN) {
2936                                 guchar *is_not_zero_check, *end_jump;
2937                                 is_not_zero_check = code;
2938                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2939                                 end_jump = code;
2940                                 x86_jump8 (code, 0);
2941                                 x86_patch (is_not_zero_check, code);
2942                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2943
2944                                 x86_patch (end_jump, code);
2945                         }
2946                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2947                         break;
2948                 case OP_FBGT:
2949                 case OP_FBGT_UN:
2950                         if (cfg->opt & MONO_OPT_FCMOV) {
2951                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
2952                                 break;
2953                         }
2954                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2955                         if (ins->opcode == OP_FBGT_UN) {
2956                                 guchar *is_not_zero_check, *end_jump;
2957                                 is_not_zero_check = code;
2958                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2959                                 end_jump = code;
2960                                 x86_jump8 (code, 0);
2961                                 x86_patch (is_not_zero_check, code);
2962                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2963
2964                                 x86_patch (end_jump, code);
2965                         }
2966                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2967                         break;
2968                 case OP_FBGE:
2969                         /* Branch if C013 == 100 or 001 */
2970                         if (cfg->opt & MONO_OPT_FCMOV) {
2971                                 guchar *br1;
2972
2973                                 /* skip branch if C1=1 */
2974                                 br1 = code;
2975                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2976                                 /* branch if (C0 | C3) = 1 */
2977                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
2978                                 x86_patch (br1, code);
2979                                 break;
2980                         }
2981                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2982                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2983                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2984                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2985                         break;
2986                 case OP_FBGE_UN:
2987                         /* Branch if C013 == 000 */
2988                         if (cfg->opt & MONO_OPT_FCMOV) {
2989                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
2990                                 break;
2991                         }
2992                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2993                         break;
2994                 case OP_FBLE:
2995                         /* Branch if C013=000 or 100 */
2996                         if (cfg->opt & MONO_OPT_FCMOV) {
2997                                 guchar *br1;
2998
2999                                 /* skip branch if C1=1 */
3000                                 br1 = code;
3001                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3002                                 /* branch if C0=0 */
3003                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3004                                 x86_patch (br1, code);
3005                                 break;
3006                         }
3007                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3008                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3009                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3010                         break;
3011                 case OP_FBLE_UN:
3012                         /* Branch if C013 != 001 */
3013                         if (cfg->opt & MONO_OPT_FCMOV) {
3014                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3015                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3016                                 break;
3017                         }
3018                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3019                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3020                         break;
3021                 case CEE_CKFINITE: {
3022                         x86_push_reg (code, X86_EAX);
3023                         x86_fxam (code);
3024                         x86_fnstsw (code);
3025                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3026                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3027                         x86_pop_reg (code, X86_EAX);
3028                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3029                         break;
3030                 }
3031                 case OP_TLS_GET: {
3032                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3033                         break;
3034                 }
3035                 case OP_MEMORY_BARRIER: {
3036                         /* Not needed on x86 */
3037                         break;
3038                 }
3039                 case OP_ATOMIC_ADD_I4: {
3040                         int dreg = ins->dreg;
3041
3042                         if (dreg == ins->inst_basereg) {
3043                                 x86_push_reg (code, ins->sreg2);
3044                                 dreg = ins->sreg2;
3045                         } 
3046                         
3047                         if (dreg != ins->sreg2)
3048                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3049
3050                         x86_prefix (code, X86_LOCK_PREFIX);
3051                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3052
3053                         if (dreg != ins->dreg) {
3054                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3055                                 x86_pop_reg (code, dreg);
3056                         }
3057
3058                         break;
3059                 }
3060                 case OP_ATOMIC_ADD_NEW_I4: {
3061                         int dreg = ins->dreg;
3062
3063                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3064                         if (ins->sreg2 == dreg) {
3065                                 if (dreg == X86_EBX) {
3066                                         dreg = X86_EDI;
3067                                         if (ins->inst_basereg == X86_EDI)
3068                                                 dreg = X86_ESI;
3069                                 } else {
3070                                         dreg = X86_EBX;
3071                                         if (ins->inst_basereg == X86_EBX)
3072                                                 dreg = X86_EDI;
3073                                 }
3074                         } else if (ins->inst_basereg == dreg) {
3075                                 if (dreg == X86_EBX) {
3076                                         dreg = X86_EDI;
3077                                         if (ins->sreg2 == X86_EDI)
3078                                                 dreg = X86_ESI;
3079                                 } else {
3080                                         dreg = X86_EBX;
3081                                         if (ins->sreg2 == X86_EBX)
3082                                                 dreg = X86_EDI;
3083                                 }
3084                         }
3085
3086                         if (dreg != ins->dreg) {
3087                                 x86_push_reg (code, dreg);
3088                         }
3089
3090                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3091                         x86_prefix (code, X86_LOCK_PREFIX);
3092                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3093                         /* dreg contains the old value, add with sreg2 value */
3094                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3095                         
3096                         if (ins->dreg != dreg) {
3097                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3098                                 x86_pop_reg (code, dreg);
3099                         }
3100
3101                         break;
3102                 }
3103                 case OP_ATOMIC_EXCHANGE_I4: {
3104                         guchar *br[2];
3105                         int sreg2 = ins->sreg2;
3106                         int breg = ins->inst_basereg;
3107
3108                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3109                          * hack to overcome limits in x86 reg allocator 
3110                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3111                          */
3112                         if (ins->dreg != X86_EAX)
3113                                 x86_push_reg (code, X86_EAX);
3114                         
3115                         /* We need the EAX reg for the cmpxchg */
3116                         if (ins->sreg2 == X86_EAX) {
3117                                 x86_push_reg (code, X86_EDX);
3118                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3119                                 sreg2 = X86_EDX;
3120                         }
3121
3122                         if (breg == X86_EAX) {
3123                                 x86_push_reg (code, X86_ESI);
3124                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3125                                 breg = X86_ESI;
3126                         }
3127
3128                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3129
3130                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3131                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3132                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3133                         x86_patch (br [1], br [0]);
3134
3135                         if (breg != ins->inst_basereg)
3136                                 x86_pop_reg (code, X86_ESI);
3137
3138                         if (ins->dreg != X86_EAX) {
3139                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3140                                 x86_pop_reg (code, X86_EAX);
3141                         }
3142
3143                         if (ins->sreg2 != sreg2)
3144                                 x86_pop_reg (code, X86_EDX);
3145
3146                         break;
3147                 }
3148                 default:
3149                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3150                         g_assert_not_reached ();
3151                 }
3152
3153                 if ((code - cfg->native_code - offset) > max_len) {
3154                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3155                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3156                         g_assert_not_reached ();
3157                 }
3158                
3159                 cpos += max_len;
3160
3161                 last_ins = ins;
3162                 last_offset = offset;
3163                 
3164                 ins = ins->next;
3165         }
3166
3167         cfg->code_len = code - cfg->native_code;
3168 }
3169
3170 void
3171 mono_arch_register_lowlevel_calls (void)
3172 {
3173 }
3174
3175 void
3176 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3177 {
3178         MonoJumpInfo *patch_info;
3179         gboolean compile_aot = !run_cctors;
3180
3181         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3182                 unsigned char *ip = patch_info->ip.i + code;
3183                 const unsigned char *target;
3184
3185                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3186
3187                 if (compile_aot) {
3188                         switch (patch_info->type) {
3189                         case MONO_PATCH_INFO_BB:
3190                         case MONO_PATCH_INFO_LABEL:
3191                                 break;
3192                         default:
3193                                 /* No need to patch these */
3194                                 continue;
3195                         }
3196                 }
3197
3198                 switch (patch_info->type) {
3199                 case MONO_PATCH_INFO_IP:
3200                         *((gconstpointer *)(ip)) = target;
3201                         break;
3202                 case MONO_PATCH_INFO_CLASS_INIT: {
3203                         guint8 *code = ip;
3204                         /* Might already been changed to a nop */
3205                         x86_call_code (code, 0);
3206                         x86_patch (ip, target);
3207                         break;
3208                 }
3209                 case MONO_PATCH_INFO_ABS:
3210                 case MONO_PATCH_INFO_METHOD:
3211                 case MONO_PATCH_INFO_METHOD_JUMP:
3212                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3213                 case MONO_PATCH_INFO_BB:
3214                 case MONO_PATCH_INFO_LABEL:
3215                         x86_patch (ip, target);
3216                         break;
3217                 case MONO_PATCH_INFO_NONE:
3218                         break;
3219                 default: {
3220                         guint32 offset = mono_arch_get_patch_offset (ip);
3221                         *((gconstpointer *)(ip + offset)) = target;
3222                         break;
3223                 }
3224                 }
3225         }
3226 }
3227
3228 guint8 *
3229 mono_arch_emit_prolog (MonoCompile *cfg)
3230 {
3231         MonoMethod *method = cfg->method;
3232         MonoBasicBlock *bb;
3233         MonoMethodSignature *sig;
3234         MonoInst *inst;
3235         int alloc_size, pos, max_offset, i;
3236         guint8 *code;
3237
3238         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3239         code = cfg->native_code = g_malloc (cfg->code_size);
3240
3241         x86_push_reg (code, X86_EBP);
3242         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3243
3244         alloc_size = cfg->stack_offset;
3245         pos = 0;
3246
3247         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3248                 /* Might need to attach the thread to the JIT */
3249                 if (lmf_tls_offset != -1) {
3250                         guint8 *buf;
3251
3252                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3253                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3254                         buf = code;
3255                         x86_branch8 (code, X86_CC_NE, 0, 0);
3256                         x86_push_imm (code, cfg->domain);
3257                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3258                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3259                         x86_patch (buf, code);
3260 #ifdef PLATFORM_WIN32
3261                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3262                         /* FIXME: Add a separate key for LMF to avoid this */
3263                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3264 #endif
3265                 } else {
3266                         g_assert (!cfg->compile_aot);
3267                         x86_push_imm (code, cfg->domain);
3268                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3269                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3270                 }
3271         }
3272
3273         if (method->save_lmf) {
3274                 pos += sizeof (MonoLMF);
3275
3276                 /* save the current IP */
3277                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3278                 x86_push_imm_template (code);
3279
3280                 /* save all caller saved regs */
3281                 x86_push_reg (code, X86_EBP);
3282                 x86_push_reg (code, X86_ESI);
3283                 x86_push_reg (code, X86_EDI);
3284                 x86_push_reg (code, X86_EBX);
3285
3286                 /* save method info */
3287                 x86_push_imm (code, method);
3288
3289                 /* get the address of lmf for the current thread */
3290                 /* 
3291                  * This is performance critical so we try to use some tricks to make
3292                  * it fast.
3293                  */
3294                 if (lmf_tls_offset != -1) {
3295                         /* Load lmf quicky using the GS register */
3296                         code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
3297 #ifdef PLATFORM_WIN32
3298                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3299                         /* FIXME: Add a separate key for LMF to avoid this */
3300                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3301 #endif
3302                 } else {
3303                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3304                 }
3305
3306                 /* push lmf */
3307                 x86_push_reg (code, X86_EAX); 
3308                 /* push *lfm (previous_lmf) */
3309                 x86_push_membase (code, X86_EAX, 0);
3310                 /* *(lmf) = ESP */
3311                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3312         } else {
3313
3314                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3315                         x86_push_reg (code, X86_EBX);
3316                         pos += 4;
3317                 }
3318
3319                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3320                         x86_push_reg (code, X86_EDI);
3321                         pos += 4;
3322                 }
3323
3324                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3325                         x86_push_reg (code, X86_ESI);
3326                         pos += 4;
3327                 }
3328         }
3329
3330         alloc_size -= pos;
3331
3332 #if __APPLE__
3333         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3334         {
3335                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3336                 if (tot & 4) {
3337                         tot += 4;
3338                         alloc_size += 4;
3339                 }
3340                 if (tot & 8) {
3341                         alloc_size += 8;
3342                 }
3343         }
3344 #endif
3345
3346         if (alloc_size) {
3347                 /* See mono_emit_stack_alloc */
3348 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3349                 guint32 remaining_size = alloc_size;
3350                 while (remaining_size >= 0x1000) {
3351                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3352                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3353                         remaining_size -= 0x1000;
3354                 }
3355                 if (remaining_size)
3356                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3357 #else
3358                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3359 #endif
3360         }
3361
3362 #if __APPLE_
3363         /* check the stack is aligned */
3364         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3365         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3366         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3367         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3368         x86_breakpoint (code);
3369 #endif
3370
3371         /* compute max_offset in order to use short forward jumps */
3372         max_offset = 0;
3373         if (cfg->opt & MONO_OPT_BRANCH) {
3374                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3375                         MonoInst *ins = bb->code;
3376                         bb->max_offset = max_offset;
3377
3378                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3379                                 max_offset += 6;
3380                         /* max alignment for loops */
3381                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3382                                 max_offset += LOOP_ALIGNMENT;
3383
3384                         while (ins) {
3385                                 if (ins->opcode == OP_LABEL)
3386                                         ins->inst_c1 = max_offset;
3387                                 
3388                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3389                                 ins = ins->next;
3390                         }
3391                 }
3392         }
3393
3394         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3395                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3396
3397         /* load arguments allocated to register from the stack */
3398         sig = mono_method_signature (method);
3399         pos = 0;
3400
3401         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3402                 inst = cfg->varinfo [pos];
3403                 if (inst->opcode == OP_REGVAR) {
3404                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3405                         if (cfg->verbose_level > 2)
3406                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3407                 }
3408                 pos++;
3409         }
3410
3411         cfg->code_len = code - cfg->native_code;
3412
3413         return code;
3414 }
3415
3416 void
3417 mono_arch_emit_epilog (MonoCompile *cfg)
3418 {
3419         MonoMethod *method = cfg->method;
3420         MonoMethodSignature *sig = mono_method_signature (method);
3421         int quad, pos;
3422         guint32 stack_to_pop;
3423         guint8 *code;
3424         int max_epilog_size = 16;
3425         CallInfo *cinfo;
3426         
3427         if (cfg->method->save_lmf)
3428                 max_epilog_size += 128;
3429         
3430         if (mono_jit_trace_calls != NULL)
3431                 max_epilog_size += 50;
3432
3433         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3434                 cfg->code_size *= 2;
3435                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3436                 mono_jit_stats.code_reallocs++;
3437         }
3438
3439         code = cfg->native_code + cfg->code_len;
3440
3441         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3442                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3443
3444         /* the code restoring the registers must be kept in sync with CEE_JMP */
3445         pos = 0;
3446         
3447         if (method->save_lmf) {
3448                 gint32 prev_lmf_reg;
3449                 gint32 lmf_offset = -sizeof (MonoLMF);
3450
3451                 /* Find a spare register */
3452                 switch (sig->ret->type) {
3453                 case MONO_TYPE_I8:
3454                 case MONO_TYPE_U8:
3455                         prev_lmf_reg = X86_EDI;
3456                         cfg->used_int_regs |= (1 << X86_EDI);
3457                         break;
3458                 default:
3459                         prev_lmf_reg = X86_EDX;
3460                         break;
3461                 }
3462
3463                 /* reg = previous_lmf */
3464                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3465
3466                 /* ecx = lmf */
3467                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3468
3469                 /* *(lmf) = previous_lmf */
3470                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3471
3472                 /* restore caller saved regs */
3473                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3474                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3475                 }
3476
3477                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3478                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3479                 }
3480                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3481                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3482                 }
3483
3484                 /* EBP is restored by LEAVE */
3485         } else {
3486                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3487                         pos -= 4;
3488                 }
3489                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3490                         pos -= 4;
3491                 }
3492                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3493                         pos -= 4;
3494                 }
3495
3496                 if (pos)
3497                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3498
3499                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3500                         x86_pop_reg (code, X86_ESI);
3501                 }
3502                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3503                         x86_pop_reg (code, X86_EDI);
3504                 }
3505                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3506                         x86_pop_reg (code, X86_EBX);
3507                 }
3508         }
3509
3510         /* Load returned vtypes into registers if needed */
3511         cinfo = get_call_info (sig, FALSE);
3512         if (cinfo->ret.storage == ArgValuetypeInReg) {
3513                 for (quad = 0; quad < 2; quad ++) {
3514                         switch (cinfo->ret.pair_storage [quad]) {
3515                         case ArgInIReg:
3516                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3517                                 break;
3518                         case ArgOnFloatFpStack:
3519                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3520                                 break;
3521                         case ArgOnDoubleFpStack:
3522                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3523                                 break;
3524                         case ArgNone:
3525                                 break;
3526                         default:
3527                                 g_assert_not_reached ();
3528                         }
3529                 }
3530         }
3531
3532         x86_leave (code);
3533
3534         if (CALLCONV_IS_STDCALL (sig)) {
3535                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3536
3537                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3538         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3539                 stack_to_pop = 4;
3540         else
3541                 stack_to_pop = 0;
3542
3543         if (stack_to_pop)
3544                 x86_ret_imm (code, stack_to_pop);
3545         else
3546                 x86_ret (code);
3547
3548         g_free (cinfo);
3549
3550         cfg->code_len = code - cfg->native_code;
3551
3552         g_assert (cfg->code_len < cfg->code_size);
3553 }
3554
3555 void
3556 mono_arch_emit_exceptions (MonoCompile *cfg)
3557 {
3558         MonoJumpInfo *patch_info;
3559         int nthrows, i;
3560         guint8 *code;
3561         MonoClass *exc_classes [16];
3562         guint8 *exc_throw_start [16], *exc_throw_end [16];
3563         guint32 code_size;
3564         int exc_count = 0;
3565
3566         /* Compute needed space */
3567         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3568                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3569                         exc_count++;
3570         }
3571
3572         /* 
3573          * make sure we have enough space for exceptions
3574          * 16 is the size of two push_imm instructions and a call
3575          */
3576         if (cfg->compile_aot)
3577                 code_size = exc_count * 32;
3578         else
3579                 code_size = exc_count * 16;
3580
3581         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3582                 cfg->code_size *= 2;
3583                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3584                 mono_jit_stats.code_reallocs++;
3585         }
3586
3587         code = cfg->native_code + cfg->code_len;
3588
3589         nthrows = 0;
3590         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3591                 switch (patch_info->type) {
3592                 case MONO_PATCH_INFO_EXC: {
3593                         MonoClass *exc_class;
3594                         guint8 *buf, *buf2;
3595                         guint32 throw_ip;
3596
3597                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3598
3599                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3600                         g_assert (exc_class);
3601                         throw_ip = patch_info->ip.i;
3602
3603                         /* Find a throw sequence for the same exception class */
3604                         for (i = 0; i < nthrows; ++i)
3605                                 if (exc_classes [i] == exc_class)
3606                                         break;
3607                         if (i < nthrows) {
3608                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3609                                 x86_jump_code (code, exc_throw_start [i]);
3610                                 patch_info->type = MONO_PATCH_INFO_NONE;
3611                         }
3612                         else {
3613                                 guint32 size;
3614
3615                                 /* Compute size of code following the push <OFFSET> */
3616                                 size = 5 + 5;
3617
3618                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3619                                         /* Use the shorter form */
3620                                         buf = buf2 = code;
3621                                         x86_push_imm (code, 0);
3622                                 }
3623                                 else {
3624                                         buf = code;
3625                                         x86_push_imm (code, 0xf0f0f0f0);
3626                                         buf2 = code;
3627                                 }
3628
3629                                 if (nthrows < 16) {
3630                                         exc_classes [nthrows] = exc_class;
3631                                         exc_throw_start [nthrows] = code;
3632                                 }
3633
3634                                 x86_push_imm (code, exc_class->type_token);
3635                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3636                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3637                                 patch_info->ip.i = code - cfg->native_code;
3638                                 x86_call_code (code, 0);
3639                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3640                                 while (buf < buf2)
3641                                         x86_nop (buf);
3642
3643                                 if (nthrows < 16) {
3644                                         exc_throw_end [nthrows] = code;
3645                                         nthrows ++;
3646                                 }
3647                         }
3648                         break;
3649                 }
3650                 default:
3651                         /* do nothing */
3652                         break;
3653                 }
3654         }
3655
3656         cfg->code_len = code - cfg->native_code;
3657
3658         g_assert (cfg->code_len < cfg->code_size);
3659 }
3660
3661 void
3662 mono_arch_flush_icache (guint8 *code, gint size)
3663 {
3664         /* not needed */
3665 }
3666
3667 void
3668 mono_arch_flush_register_windows (void)
3669 {
3670 }
3671
3672 /*
3673  * Support for fast access to the thread-local lmf structure using the GS
3674  * segment register on NPTL + kernel 2.6.x.
3675  */
3676
3677 static gboolean tls_offset_inited = FALSE;
3678
3679 void
3680 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3681 {
3682         if (!tls_offset_inited) {
3683                 if (!getenv ("MONO_NO_TLS")) {
3684 #ifdef PLATFORM_WIN32
3685                         /* 
3686                          * We need to init this multiple times, since when we are first called, the key might not
3687                          * be initialized yet.
3688                          */
3689                         appdomain_tls_offset = mono_domain_get_tls_key ();
3690                         lmf_tls_offset = mono_get_jit_tls_key ();
3691                         thread_tls_offset = mono_thread_get_tls_key ();
3692
3693                         /* Only 64 tls entries can be accessed using inline code */
3694                         if (appdomain_tls_offset >= 64)
3695                                 appdomain_tls_offset = -1;
3696                         if (lmf_tls_offset >= 64)
3697                                 lmf_tls_offset = -1;
3698                         if (thread_tls_offset >= 64)
3699                                 thread_tls_offset = -1;
3700 #else
3701 #if MONO_XEN_OPT
3702                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
3703 #endif
3704                         tls_offset_inited = TRUE;
3705                         appdomain_tls_offset = mono_domain_get_tls_offset ();
3706                         lmf_tls_offset = mono_get_lmf_tls_offset ();
3707                         thread_tls_offset = mono_thread_get_tls_offset ();
3708 #endif
3709                 }
3710         }               
3711 }
3712
3713 void
3714 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
3715 {
3716 }
3717
3718 void
3719 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3720 {
3721         MonoCallInst *call = (MonoCallInst*)inst;
3722         CallInfo *cinfo = get_call_info (inst->signature, FALSE);
3723
3724         /* add the this argument */
3725         if (this_reg != -1) {
3726                 if (cinfo->args [0].storage == ArgInIReg) {
3727                         MonoInst *this;
3728                         MONO_INST_NEW (cfg, this, OP_MOVE);
3729                         this->type = this_type;
3730                         this->sreg1 = this_reg;
3731                         this->dreg = mono_regstate_next_int (cfg->rs);
3732                         mono_bblock_add_inst (cfg->cbb, this);
3733
3734                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
3735                 }
3736                 else {
3737                         MonoInst *this;
3738                         MONO_INST_NEW (cfg, this, OP_OUTARG);
3739                         this->type = this_type;
3740                         this->sreg1 = this_reg;
3741                         mono_bblock_add_inst (cfg->cbb, this);
3742                 }
3743         }
3744
3745         if (vt_reg != -1) {
3746                 MonoInst *vtarg;
3747
3748                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3749                         /*
3750                          * The valuetype is in EAX:EDX after the call, needs to be copied to
3751                          * the stack. Save the address here, so the call instruction can
3752                          * access it.
3753                          */
3754                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
3755                         vtarg->inst_destbasereg = X86_ESP;
3756                         vtarg->inst_offset = inst->stack_usage;
3757                         vtarg->sreg1 = vt_reg;
3758                         mono_bblock_add_inst (cfg->cbb, vtarg);
3759                 }
3760                 else if (cinfo->ret.storage == ArgInIReg) {
3761                         /* The return address is passed in a register */
3762                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
3763                         vtarg->sreg1 = vt_reg;
3764                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
3765                         mono_bblock_add_inst (cfg->cbb, vtarg);
3766
3767                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
3768                 } else {
3769                         MonoInst *vtarg;
3770                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3771                         vtarg->type = STACK_MP;
3772                         vtarg->sreg1 = vt_reg;
3773                         mono_bblock_add_inst (cfg->cbb, vtarg);
3774                 }
3775         }
3776
3777         g_free (cinfo);
3778 }
3779
3780 MonoInst*
3781 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3782 {
3783         MonoInst *ins = NULL;
3784
3785         if (cmethod->klass == mono_defaults.math_class) {
3786                 if (strcmp (cmethod->name, "Sin") == 0) {
3787                         MONO_INST_NEW (cfg, ins, OP_SIN);
3788                         ins->inst_i0 = args [0];
3789                 } else if (strcmp (cmethod->name, "Cos") == 0) {
3790                         MONO_INST_NEW (cfg, ins, OP_COS);
3791                         ins->inst_i0 = args [0];
3792                 } else if (strcmp (cmethod->name, "Tan") == 0) {
3793                         MONO_INST_NEW (cfg, ins, OP_TAN);
3794                         ins->inst_i0 = args [0];
3795                 } else if (strcmp (cmethod->name, "Atan") == 0) {
3796                         MONO_INST_NEW (cfg, ins, OP_ATAN);
3797                         ins->inst_i0 = args [0];
3798                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
3799                         MONO_INST_NEW (cfg, ins, OP_SQRT);
3800                         ins->inst_i0 = args [0];
3801                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
3802                         MONO_INST_NEW (cfg, ins, OP_ABS);
3803                         ins->inst_i0 = args [0];
3804                 }
3805 #if 0
3806                 /* OP_FREM is not IEEE compatible */
3807                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
3808                         MONO_INST_NEW (cfg, ins, OP_FREM);
3809                         ins->inst_i0 = args [0];
3810                         ins->inst_i1 = args [1];
3811                 }
3812 #endif
3813         } else if (cmethod->klass == mono_defaults.thread_class &&
3814                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
3815                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
3816         } else if(cmethod->klass->image == mono_defaults.corlib &&
3817                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
3818                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
3819
3820                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3821                         MonoInst *ins_iconst;
3822
3823                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3824                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3825                         ins_iconst->inst_c0 = 1;
3826
3827                         ins->inst_i0 = args [0];
3828                         ins->inst_i1 = ins_iconst;
3829                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3830                         MonoInst *ins_iconst;
3831
3832                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3833                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3834                         ins_iconst->inst_c0 = -1;
3835
3836                         ins->inst_i0 = args [0];
3837                         ins->inst_i1 = ins_iconst;
3838                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3839                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
3840
3841                         ins->inst_i0 = args [0];
3842                         ins->inst_i1 = args [1];
3843                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3844                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3845
3846                         ins->inst_i0 = args [0];
3847                         ins->inst_i1 = args [1];
3848                 }
3849         }
3850
3851         return ins;
3852 }
3853
3854
3855 gboolean
3856 mono_arch_print_tree (MonoInst *tree, int arity)
3857 {
3858         return 0;
3859 }
3860
3861 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
3862 {
3863         MonoInst* ins;
3864         
3865         if (appdomain_tls_offset == -1)
3866                 return NULL;
3867
3868         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3869         ins->inst_offset = appdomain_tls_offset;
3870         return ins;
3871 }
3872
3873 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
3874 {
3875         MonoInst* ins;
3876
3877         if (thread_tls_offset == -1)
3878                 return NULL;
3879
3880         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3881         ins->inst_offset = thread_tls_offset;
3882         return ins;
3883 }
3884
3885 guint32
3886 mono_arch_get_patch_offset (guint8 *code)
3887 {
3888         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
3889                 return 2;
3890         else if ((code [0] == 0xba))
3891                 return 1;
3892         else if ((code [0] == 0x68))
3893                 /* push IMM */
3894                 return 1;
3895         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
3896                 /* push <OFFSET>(<REG>) */
3897                 return 2;
3898         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
3899                 /* call *<OFFSET>(<REG>) */
3900                 return 2;
3901         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
3902                 /* fldl <ADDR> */
3903                 return 2;
3904         else if ((code [0] == 0x58) && (code [1] == 0x05))
3905                 /* pop %eax; add <OFFSET>, %eax */
3906                 return 2;
3907         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
3908                 /* pop <REG>; add <OFFSET>, <REG> */
3909                 return 3;
3910         else {
3911                 g_assert_not_reached ();
3912                 return -1;
3913         }
3914 }
3915
3916 gpointer*
3917 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
3918 {
3919         guint8 reg = 0;
3920         gint32 disp = 0;
3921
3922         /* go to the start of the call instruction
3923          *
3924          * address_byte = (m << 6) | (o << 3) | reg
3925          * call opcode: 0xff address_byte displacement
3926          * 0xff m=1,o=2 imm8
3927          * 0xff m=2,o=2 imm32
3928          */
3929         code -= 6;
3930
3931         /* 
3932          * A given byte sequence can match more than case here, so we have to be
3933          * really careful about the ordering of the cases. Longer sequences
3934          * come first.
3935          */
3936         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
3937                 /*
3938                  * This is an interface call
3939                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
3940                  * ff 10                   call   *(%eax)
3941                  */
3942                 reg = x86_modrm_rm (code [5]);
3943                 disp = 0;
3944         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
3945                 reg = code [4] & 0x07;
3946                 disp = (signed char)code [5];
3947         } else {
3948                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
3949                         reg = code [1] & 0x07;
3950                         disp = *((gint32*)(code + 2));
3951                 } else if ((code [1] == 0xe8)) {
3952                         return NULL;
3953                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
3954                         /*
3955                          * This is a interface call
3956                          * 8b 40 30   mov    0x30(%eax),%eax
3957                          * ff 10      call   *(%eax)
3958                          */
3959                         disp = 0;
3960                         reg = code [5] & 0x07;
3961                 }
3962                 else
3963                         return NULL;
3964         }
3965
3966         return (gpointer*)(((gint32)(regs [reg])) + disp);
3967 }
3968
3969 gpointer* 
3970 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
3971 {
3972         guint8 reg = 0;
3973         gint32 disp = 0;
3974
3975         code -= 7;
3976         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
3977                 reg = x86_modrm_rm (code [1]);
3978                 disp = code [4];
3979
3980                 if (reg == X86_EAX)
3981                         return NULL;
3982                 else
3983                         return (gpointer*)(((gint32)(regs [reg])) + disp);
3984         }
3985
3986         return NULL;
3987 }