New test.
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #include <unistd.h>
15
16 #include <mono/metadata/appdomain.h>
17 #include <mono/metadata/debug-helpers.h>
18 #include <mono/metadata/threads.h>
19 #include <mono/metadata/profiler-private.h>
20 #include <mono/utils/mono-math.h>
21
22 #include "trace.h"
23 #include "mini-x86.h"
24 #include "inssel.h"
25 #include "cpu-x86.h"
26
27 /* On windows, these hold the key returned by TlsAlloc () */
28 static gint lmf_tls_offset = -1;
29 static gint appdomain_tls_offset = -1;
30 static gint thread_tls_offset = -1;
31
32 #ifdef MONO_XEN_OPT
33 /* TRUE by default until we add runtime detection of Xen */
34 static gboolean optimize_for_xen = TRUE;
35 #else
36 #define optimize_for_xen 0
37 #endif
38
39 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
40
41 #define ARGS_OFFSET 8
42
43 #ifdef PLATFORM_WIN32
44 /* Under windows, the default pinvoke calling convention is stdcall */
45 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
46 #else
47 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
48 #endif
49
50 #define NOT_IMPLEMENTED g_assert_not_reached ()
51
52 const char*
53 mono_arch_regname (int reg) {
54         switch (reg) {
55         case X86_EAX: return "%eax";
56         case X86_EBX: return "%ebx";
57         case X86_ECX: return "%ecx";
58         case X86_EDX: return "%edx";
59         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
60         case X86_EDI: return "%edi";
61         case X86_ESI: return "%esi";
62         }
63         return "unknown";
64 }
65
66 const char*
67 mono_arch_fregname (int reg) {
68         return "unknown";
69 }
70
71 typedef enum {
72         ArgInIReg,
73         ArgInFloatSSEReg,
74         ArgInDoubleSSEReg,
75         ArgOnStack,
76         ArgValuetypeInReg,
77         ArgOnFloatFpStack,
78         ArgOnDoubleFpStack,
79         ArgNone
80 } ArgStorage;
81
82 typedef struct {
83         gint16 offset;
84         gint8  reg;
85         ArgStorage storage;
86
87         /* Only if storage == ArgValuetypeInReg */
88         ArgStorage pair_storage [2];
89         gint8 pair_regs [2];
90 } ArgInfo;
91
92 typedef struct {
93         int nargs;
94         guint32 stack_usage;
95         guint32 reg_usage;
96         guint32 freg_usage;
97         gboolean need_stack_align;
98         guint32 stack_align_amount;
99         ArgInfo ret;
100         ArgInfo sig_cookie;
101         ArgInfo args [1];
102 } CallInfo;
103
104 #define PARAM_REGS 0
105
106 #define FLOAT_PARAM_REGS 0
107
108 static X86_Reg_No param_regs [] = { 0 };
109
110 #ifdef PLATFORM_WIN32
111 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
112 #endif
113
114 static void inline
115 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
116 {
117     ainfo->offset = *stack_size;
118
119     if (*gr >= PARAM_REGS) {
120                 ainfo->storage = ArgOnStack;
121                 (*stack_size) += sizeof (gpointer);
122     }
123     else {
124                 ainfo->storage = ArgInIReg;
125                 ainfo->reg = param_regs [*gr];
126                 (*gr) ++;
127     }
128 }
129
130 static void inline
131 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
132 {
133         ainfo->offset = *stack_size;
134
135         g_assert (PARAM_REGS == 0);
136         
137         ainfo->storage = ArgOnStack;
138         (*stack_size) += sizeof (gpointer) * 2;
139 }
140
141 static void inline
142 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
143 {
144     ainfo->offset = *stack_size;
145
146     if (*gr >= FLOAT_PARAM_REGS) {
147                 ainfo->storage = ArgOnStack;
148                 (*stack_size) += is_double ? 8 : 4;
149     }
150     else {
151                 /* A double register */
152                 if (is_double)
153                         ainfo->storage = ArgInDoubleSSEReg;
154                 else
155                         ainfo->storage = ArgInFloatSSEReg;
156                 ainfo->reg = *gr;
157                 (*gr) += 1;
158     }
159 }
160
161
162 static void
163 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
164                gboolean is_return,
165                guint32 *gr, guint32 *fr, guint32 *stack_size)
166 {
167         guint32 size;
168         MonoClass *klass;
169
170         klass = mono_class_from_mono_type (type);
171         if (sig->pinvoke) 
172                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
173         else 
174                 size = mono_type_stack_size (&klass->byval_arg, NULL);
175
176 #ifdef PLATFORM_WIN32
177         if (sig->pinvoke && is_return) {
178                 MonoMarshalType *info;
179
180                 /*
181                  * the exact rules are not very well documented, the code below seems to work with the 
182                  * code generated by gcc 3.3.3 -mno-cygwin.
183                  */
184                 info = mono_marshal_load_type_info (klass);
185                 g_assert (info);
186
187                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
188
189                 /* Special case structs with only a float member */
190                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
191                         ainfo->storage = ArgValuetypeInReg;
192                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
193                         return;
194                 }
195                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
196                         ainfo->storage = ArgValuetypeInReg;
197                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
198                         return;
199                 }               
200                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
201                         ainfo->storage = ArgValuetypeInReg;
202                         ainfo->pair_storage [0] = ArgInIReg;
203                         ainfo->pair_regs [0] = return_regs [0];
204                         if (info->native_size > 4) {
205                                 ainfo->pair_storage [1] = ArgInIReg;
206                                 ainfo->pair_regs [1] = return_regs [1];
207                         }
208                         return;
209                 }
210         }
211 #endif
212
213         ainfo->offset = *stack_size;
214         ainfo->storage = ArgOnStack;
215         *stack_size += ALIGN_TO (size, sizeof (gpointer));
216 }
217
218 /*
219  * get_call_info:
220  *
221  *  Obtain information about a call according to the calling convention.
222  * For x86 ELF, see the "System V Application Binary Interface Intel386 
223  * Architecture Processor Supplment, Fourth Edition" document for more
224  * information.
225  * For x86 win32, see ???.
226  */
227 static CallInfo*
228 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
229 {
230         guint32 i, gr, fr;
231         MonoType *ret_type;
232         int n = sig->hasthis + sig->param_count;
233         guint32 stack_size = 0;
234         CallInfo *cinfo;
235
236         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
237
238         gr = 0;
239         fr = 0;
240
241         /* return value */
242         {
243                 ret_type = mono_type_get_underlying_type (sig->ret);
244                 switch (ret_type->type) {
245                 case MONO_TYPE_BOOLEAN:
246                 case MONO_TYPE_I1:
247                 case MONO_TYPE_U1:
248                 case MONO_TYPE_I2:
249                 case MONO_TYPE_U2:
250                 case MONO_TYPE_CHAR:
251                 case MONO_TYPE_I4:
252                 case MONO_TYPE_U4:
253                 case MONO_TYPE_I:
254                 case MONO_TYPE_U:
255                 case MONO_TYPE_PTR:
256                 case MONO_TYPE_FNPTR:
257                 case MONO_TYPE_CLASS:
258                 case MONO_TYPE_OBJECT:
259                 case MONO_TYPE_SZARRAY:
260                 case MONO_TYPE_ARRAY:
261                 case MONO_TYPE_STRING:
262                         cinfo->ret.storage = ArgInIReg;
263                         cinfo->ret.reg = X86_EAX;
264                         break;
265                 case MONO_TYPE_U8:
266                 case MONO_TYPE_I8:
267                         cinfo->ret.storage = ArgInIReg;
268                         cinfo->ret.reg = X86_EAX;
269                         break;
270                 case MONO_TYPE_R4:
271                         cinfo->ret.storage = ArgOnFloatFpStack;
272                         break;
273                 case MONO_TYPE_R8:
274                         cinfo->ret.storage = ArgOnDoubleFpStack;
275                         break;
276                 case MONO_TYPE_GENERICINST:
277                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
278                                 cinfo->ret.storage = ArgInIReg;
279                                 cinfo->ret.reg = X86_EAX;
280                                 break;
281                         }
282                         /* Fall through */
283                 case MONO_TYPE_VALUETYPE: {
284                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
285
286                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
287                         if (cinfo->ret.storage == ArgOnStack)
288                                 /* The caller passes the address where the value is stored */
289                                 add_general (&gr, &stack_size, &cinfo->ret);
290                         break;
291                 }
292                 case MONO_TYPE_TYPEDBYREF:
293                         /* Same as a valuetype with size 24 */
294                         add_general (&gr, &stack_size, &cinfo->ret);
295                         ;
296                         break;
297                 case MONO_TYPE_VOID:
298                         cinfo->ret.storage = ArgNone;
299                         break;
300                 default:
301                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
302                 }
303         }
304
305         /* this */
306         if (sig->hasthis)
307                 add_general (&gr, &stack_size, cinfo->args + 0);
308
309         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
310                 gr = PARAM_REGS;
311                 fr = FLOAT_PARAM_REGS;
312                 
313                 /* Emit the signature cookie just before the implicit arguments */
314                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
315         }
316
317         for (i = 0; i < sig->param_count; ++i) {
318                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
319                 MonoType *ptype;
320
321                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
322                         /* We allways pass the sig cookie on the stack for simplicity */
323                         /* 
324                          * Prevent implicit arguments + the sig cookie from being passed 
325                          * in registers.
326                          */
327                         gr = PARAM_REGS;
328                         fr = FLOAT_PARAM_REGS;
329
330                         /* Emit the signature cookie just before the implicit arguments */
331                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
332                 }
333
334                 if (sig->params [i]->byref) {
335                         add_general (&gr, &stack_size, ainfo);
336                         continue;
337                 }
338                 ptype = mono_type_get_underlying_type (sig->params [i]);
339                 switch (ptype->type) {
340                 case MONO_TYPE_BOOLEAN:
341                 case MONO_TYPE_I1:
342                 case MONO_TYPE_U1:
343                         add_general (&gr, &stack_size, ainfo);
344                         break;
345                 case MONO_TYPE_I2:
346                 case MONO_TYPE_U2:
347                 case MONO_TYPE_CHAR:
348                         add_general (&gr, &stack_size, ainfo);
349                         break;
350                 case MONO_TYPE_I4:
351                 case MONO_TYPE_U4:
352                         add_general (&gr, &stack_size, ainfo);
353                         break;
354                 case MONO_TYPE_I:
355                 case MONO_TYPE_U:
356                 case MONO_TYPE_PTR:
357                 case MONO_TYPE_FNPTR:
358                 case MONO_TYPE_CLASS:
359                 case MONO_TYPE_OBJECT:
360                 case MONO_TYPE_STRING:
361                 case MONO_TYPE_SZARRAY:
362                 case MONO_TYPE_ARRAY:
363                         add_general (&gr, &stack_size, ainfo);
364                         break;
365                 case MONO_TYPE_GENERICINST:
366                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
367                                 add_general (&gr, &stack_size, ainfo);
368                                 break;
369                         }
370                         /* Fall through */
371                 case MONO_TYPE_VALUETYPE:
372                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
373                         break;
374                 case MONO_TYPE_TYPEDBYREF:
375                         stack_size += sizeof (MonoTypedRef);
376                         ainfo->storage = ArgOnStack;
377                         break;
378                 case MONO_TYPE_U8:
379                 case MONO_TYPE_I8:
380                         add_general_pair (&gr, &stack_size, ainfo);
381                         break;
382                 case MONO_TYPE_R4:
383                         add_float (&fr, &stack_size, ainfo, FALSE);
384                         break;
385                 case MONO_TYPE_R8:
386                         add_float (&fr, &stack_size, ainfo, TRUE);
387                         break;
388                 default:
389                         g_error ("unexpected type 0x%x", ptype->type);
390                         g_assert_not_reached ();
391                 }
392         }
393
394         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
395                 gr = PARAM_REGS;
396                 fr = FLOAT_PARAM_REGS;
397                 
398                 /* Emit the signature cookie just before the implicit arguments */
399                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
400         }
401
402 #if defined(__APPLE__)
403         if ((stack_size % 16) != 0) { 
404                 cinfo->need_stack_align = TRUE;
405                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
406         }
407 #endif
408
409         cinfo->stack_usage = stack_size;
410         cinfo->reg_usage = gr;
411         cinfo->freg_usage = fr;
412         return cinfo;
413 }
414
415 /*
416  * mono_arch_get_argument_info:
417  * @csig:  a method signature
418  * @param_count: the number of parameters to consider
419  * @arg_info: an array to store the result infos
420  *
421  * Gathers information on parameters such as size, alignment and
422  * padding. arg_info should be large enought to hold param_count + 1 entries. 
423  *
424  * Returns the size of the activation frame.
425  */
426 int
427 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
428 {
429         int k, frame_size = 0;
430         int size, pad;
431         guint32 align;
432         int offset = 8;
433         CallInfo *cinfo;
434
435         cinfo = get_call_info (csig, FALSE);
436
437         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
438                 frame_size += sizeof (gpointer);
439                 offset += 4;
440         }
441
442         arg_info [0].offset = offset;
443
444         if (csig->hasthis) {
445                 frame_size += sizeof (gpointer);
446                 offset += 4;
447         }
448
449         arg_info [0].size = frame_size;
450
451         for (k = 0; k < param_count; k++) {
452                 
453                 if (csig->pinvoke)
454                         size = mono_type_native_stack_size (csig->params [k], &align);
455                 else {
456                         int ialign;
457                         size = mono_type_stack_size (csig->params [k], &ialign);
458                         align = ialign;
459                 }
460
461                 /* ignore alignment for now */
462                 align = 1;
463
464                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
465                 arg_info [k].pad = pad;
466                 frame_size += size;
467                 arg_info [k + 1].pad = 0;
468                 arg_info [k + 1].size = size;
469                 offset += pad;
470                 arg_info [k + 1].offset = offset;
471                 offset += size;
472         }
473
474         align = MONO_ARCH_FRAME_ALIGNMENT;
475         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
476         arg_info [k].pad = pad;
477
478         g_free (cinfo);
479
480         return frame_size;
481 }
482
483 static const guchar cpuid_impl [] = {
484         0x55,                           /* push   %ebp */
485         0x89, 0xe5,                     /* mov    %esp,%ebp */
486         0x53,                           /* push   %ebx */
487         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
488         0x0f, 0xa2,                     /* cpuid   */
489         0x50,                           /* push   %eax */
490         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
491         0x89, 0x18,                     /* mov    %ebx,(%eax) */
492         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
493         0x89, 0x08,                     /* mov    %ecx,(%eax) */
494         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
495         0x89, 0x10,                     /* mov    %edx,(%eax) */
496         0x58,                           /* pop    %eax */
497         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
498         0x89, 0x02,                     /* mov    %eax,(%edx) */
499         0x5b,                           /* pop    %ebx */
500         0xc9,                           /* leave   */
501         0xc3,                           /* ret     */
502 };
503
504 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
505
506 static int 
507 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
508 {
509         int have_cpuid = 0;
510 #ifndef _MSC_VER
511         __asm__  __volatile__ (
512                 "pushfl\n"
513                 "popl %%eax\n"
514                 "movl %%eax, %%edx\n"
515                 "xorl $0x200000, %%eax\n"
516                 "pushl %%eax\n"
517                 "popfl\n"
518                 "pushfl\n"
519                 "popl %%eax\n"
520                 "xorl %%edx, %%eax\n"
521                 "andl $0x200000, %%eax\n"
522                 "movl %%eax, %0"
523                 : "=r" (have_cpuid)
524                 :
525                 : "%eax", "%edx"
526         );
527 #else
528         __asm {
529                 pushfd
530                 pop eax
531                 mov edx, eax
532                 xor eax, 0x200000
533                 push eax
534                 popfd
535                 pushfd
536                 pop eax
537                 xor eax, edx
538                 and eax, 0x200000
539                 mov have_cpuid, eax
540         }
541 #endif
542         if (have_cpuid) {
543                 /* Have to use the code manager to get around WinXP DEP */
544                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
545                 CpuidFunc func;
546                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
547                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
548
549                 func = (CpuidFunc)ptr;
550                 func (id, p_eax, p_ebx, p_ecx, p_edx);
551
552                 mono_code_manager_destroy (codeman);
553
554                 /*
555                  * We use this approach because of issues with gcc and pic code, see:
556                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
557                 __asm__ __volatile__ ("cpuid"
558                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
559                         : "a" (id));
560                 */
561                 return 1;
562         }
563         return 0;
564 }
565
566 /*
567  * Initialize the cpu to execute managed code.
568  */
569 void
570 mono_arch_cpu_init (void)
571 {
572         /* spec compliance requires running with double precision */
573 #ifndef _MSC_VER
574         guint16 fpcw;
575
576         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
577         fpcw &= ~X86_FPCW_PRECC_MASK;
578         fpcw |= X86_FPCW_PREC_DOUBLE;
579         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
580         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
581 #else
582         _control87 (_PC_53, MCW_PC);
583 #endif
584 }
585
586 /*
587  * This function returns the optimizations supported on this cpu.
588  */
589 guint32
590 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
591 {
592         int eax, ebx, ecx, edx;
593         guint32 opts = 0;
594         
595         *exclude_mask = 0;
596         /* Feature Flags function, flags returned in EDX. */
597         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
598                 if (edx & (1 << 15)) {
599                         opts |= MONO_OPT_CMOV;
600                         if (edx & 1)
601                                 opts |= MONO_OPT_FCMOV;
602                         else
603                                 *exclude_mask |= MONO_OPT_FCMOV;
604                 } else
605                         *exclude_mask |= MONO_OPT_CMOV;
606         }
607         return opts;
608 }
609
610 /*
611  * Determine whenever the trap whose info is in SIGINFO is caused by
612  * integer overflow.
613  */
614 gboolean
615 mono_arch_is_int_overflow (void *sigctx, void *info)
616 {
617         MonoContext ctx;
618         guint8* ip;
619
620         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
621
622         ip = (guint8*)ctx.eip;
623
624         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
625                 gint32 reg;
626
627                 /* idiv REG */
628                 switch (x86_modrm_rm (ip [1])) {
629                 case X86_EAX:
630                         reg = ctx.eax;
631                         break;
632                 case X86_ECX:
633                         reg = ctx.ecx;
634                         break;
635                 case X86_EDX:
636                         reg = ctx.edx;
637                         break;
638                 case X86_EBX:
639                         reg = ctx.ebx;
640                         break;
641                 case X86_ESI:
642                         reg = ctx.esi;
643                         break;
644                 case X86_EDI:
645                         reg = ctx.edi;
646                         break;
647                 default:
648                         g_assert_not_reached ();
649                         reg = -1;
650                 }
651
652                 if (reg == -1)
653                         return TRUE;
654         }
655                         
656         return FALSE;
657 }
658
659 static gboolean
660 is_regsize_var (MonoType *t) {
661         if (t->byref)
662                 return TRUE;
663         switch (mono_type_get_underlying_type (t)->type) {
664         case MONO_TYPE_I4:
665         case MONO_TYPE_U4:
666         case MONO_TYPE_I:
667         case MONO_TYPE_U:
668         case MONO_TYPE_PTR:
669         case MONO_TYPE_FNPTR:
670                 return TRUE;
671         case MONO_TYPE_OBJECT:
672         case MONO_TYPE_STRING:
673         case MONO_TYPE_CLASS:
674         case MONO_TYPE_SZARRAY:
675         case MONO_TYPE_ARRAY:
676                 return TRUE;
677         case MONO_TYPE_GENERICINST:
678                 if (!mono_type_generic_inst_is_valuetype (t))
679                         return TRUE;
680                 return FALSE;
681         case MONO_TYPE_VALUETYPE:
682                 return FALSE;
683         }
684         return FALSE;
685 }
686
687 GList *
688 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
689 {
690         GList *vars = NULL;
691         int i;
692
693         for (i = 0; i < cfg->num_varinfo; i++) {
694                 MonoInst *ins = cfg->varinfo [i];
695                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
696
697                 /* unused vars */
698                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
699                         continue;
700
701                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
702                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
703                         continue;
704
705                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
706                  * 8bit quantities in caller saved registers on x86 */
707                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
708                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
709                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
710                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
711                         g_assert (i == vmv->idx);
712                         vars = g_list_prepend (vars, vmv);
713                 }
714         }
715
716         vars = mono_varlist_sort (cfg, vars, 0);
717
718         return vars;
719 }
720
721 GList *
722 mono_arch_get_global_int_regs (MonoCompile *cfg)
723 {
724         GList *regs = NULL;
725
726         /* we can use 3 registers for global allocation */
727         regs = g_list_prepend (regs, (gpointer)X86_EBX);
728         regs = g_list_prepend (regs, (gpointer)X86_ESI);
729         regs = g_list_prepend (regs, (gpointer)X86_EDI);
730
731         return regs;
732 }
733
734 /*
735  * mono_arch_regalloc_cost:
736  *
737  *  Return the cost, in number of memory references, of the action of 
738  * allocating the variable VMV into a register during global register
739  * allocation.
740  */
741 guint32
742 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
743 {
744         MonoInst *ins = cfg->varinfo [vmv->idx];
745
746         if (cfg->method->save_lmf)
747                 /* The register is already saved */
748                 return (ins->opcode == OP_ARG) ? 1 : 0;
749         else
750                 /* push+pop+possible load if it is an argument */
751                 return (ins->opcode == OP_ARG) ? 3 : 2;
752 }
753  
754 /*
755  * Set var information according to the calling convention. X86 version.
756  * The locals var stuff should most likely be split in another method.
757  */
758 void
759 mono_arch_allocate_vars (MonoCompile *cfg)
760 {
761         MonoMethodSignature *sig;
762         MonoMethodHeader *header;
763         MonoInst *inst;
764         guint32 locals_stack_size, locals_stack_align;
765         int i, offset;
766         gint32 *offsets;
767         CallInfo *cinfo;
768
769         header = mono_method_get_header (cfg->method);
770         sig = mono_method_signature (cfg->method);
771
772         cinfo = get_call_info (sig, FALSE);
773
774         cfg->frame_reg = MONO_ARCH_BASEREG;
775         offset = 0;
776
777         /* Reserve space to save LMF and caller saved registers */
778
779         if (cfg->method->save_lmf) {
780                 offset += sizeof (MonoLMF);
781         } else {
782                 if (cfg->used_int_regs & (1 << X86_EBX)) {
783                         offset += 4;
784                 }
785
786                 if (cfg->used_int_regs & (1 << X86_EDI)) {
787                         offset += 4;
788                 }
789
790                 if (cfg->used_int_regs & (1 << X86_ESI)) {
791                         offset += 4;
792                 }
793         }
794
795         switch (cinfo->ret.storage) {
796         case ArgValuetypeInReg:
797                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
798                 offset += 8;
799                 cfg->ret->opcode = OP_REGOFFSET;
800                 cfg->ret->inst_basereg = X86_EBP;
801                 cfg->ret->inst_offset = - offset;
802                 break;
803         default:
804                 break;
805         }
806
807         /* Allocate locals */
808         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
809         if (locals_stack_align) {
810                 offset += (locals_stack_align - 1);
811                 offset &= ~(locals_stack_align - 1);
812         }
813         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
814                 if (offsets [i] != -1) {
815                         MonoInst *inst = cfg->varinfo [i];
816                         inst->opcode = OP_REGOFFSET;
817                         inst->inst_basereg = X86_EBP;
818                         inst->inst_offset = - (offset + offsets [i]);
819                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
820                 }
821         }
822         g_free (offsets);
823         offset += locals_stack_size;
824
825
826         /*
827          * Allocate arguments+return value
828          */
829
830         switch (cinfo->ret.storage) {
831         case ArgOnStack:
832                 cfg->ret->opcode = OP_REGOFFSET;
833                 cfg->ret->inst_basereg = X86_EBP;
834                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
835                 break;
836         case ArgValuetypeInReg:
837                 break;
838         case ArgInIReg:
839                 cfg->ret->opcode = OP_REGVAR;
840                 cfg->ret->inst_c0 = cinfo->ret.reg;
841                 break;
842         case ArgNone:
843         case ArgOnFloatFpStack:
844         case ArgOnDoubleFpStack:
845                 break;
846         default:
847                 g_assert_not_reached ();
848         }
849
850         if (sig->call_convention == MONO_CALL_VARARG) {
851                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
852                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
853         }
854
855         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
856                 ArgInfo *ainfo = &cinfo->args [i];
857                 inst = cfg->varinfo [i];
858                 if (inst->opcode != OP_REGVAR) {
859                         inst->opcode = OP_REGOFFSET;
860                         inst->inst_basereg = X86_EBP;
861                 }
862                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
863         }
864
865         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
866         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
867
868         cfg->stack_offset = offset;
869
870         g_free (cinfo);
871 }
872
873 void
874 mono_arch_create_vars (MonoCompile *cfg)
875 {
876         MonoMethodSignature *sig;
877         CallInfo *cinfo;
878
879         sig = mono_method_signature (cfg->method);
880
881         cinfo = get_call_info (sig, FALSE);
882
883         if (cinfo->ret.storage == ArgValuetypeInReg)
884                 cfg->ret_var_is_local = TRUE;
885
886         g_free (cinfo);
887 }
888
889 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
890  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
891  */
892
893 static void
894 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
895 {
896         MonoInst *arg;
897         MonoMethodSignature *tmp_sig;
898         MonoInst *sig_arg;
899
900         /* FIXME: Add support for signature tokens to AOT */
901         cfg->disable_aot = TRUE;
902         MONO_INST_NEW (cfg, arg, OP_OUTARG);
903
904         /*
905          * mono_ArgIterator_Setup assumes the signature cookie is 
906          * passed first and all the arguments which were before it are
907          * passed on the stack after the signature. So compensate by 
908          * passing a different signature.
909          */
910         tmp_sig = mono_metadata_signature_dup (call->signature);
911         tmp_sig->param_count -= call->signature->sentinelpos;
912         tmp_sig->sentinelpos = 0;
913         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
914
915         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
916         sig_arg->inst_p0 = tmp_sig;
917
918         arg->inst_left = sig_arg;
919         arg->type = STACK_PTR;
920         /* prepend, so they get reversed */
921         arg->next = call->out_args;
922         call->out_args = arg;
923 }
924
925 /* 
926  * take the arguments and generate the arch-specific
927  * instructions to properly call the function in call.
928  * This includes pushing, moving arguments to the right register
929  * etc.
930  */
931 MonoCallInst*
932 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
933         MonoInst *arg, *in;
934         MonoMethodSignature *sig;
935         int i, n;
936         CallInfo *cinfo;
937         int sentinelpos = 0;
938
939         sig = call->signature;
940         n = sig->param_count + sig->hasthis;
941
942         cinfo = get_call_info (sig, FALSE);
943
944         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
945                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
946
947         for (i = 0; i < n; ++i) {
948                 ArgInfo *ainfo = cinfo->args + i;
949
950                 /* Emit the signature cookie just before the implicit arguments */
951                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
952                         emit_sig_cookie (cfg, call);
953                 }
954
955                 if (is_virtual && i == 0) {
956                         /* the argument will be attached to the call instrucion */
957                         in = call->args [i];
958                 } else {
959                         MonoType *t;
960
961                         if (i >= sig->hasthis)
962                                 t = sig->params [i - sig->hasthis];
963                         else
964                                 t = &mono_defaults.int_class->byval_arg;
965                         t = mono_type_get_underlying_type (t);
966
967                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
968                         in = call->args [i];
969                         arg->cil_code = in->cil_code;
970                         arg->inst_left = in;
971                         arg->type = in->type;
972                         /* prepend, so they get reversed */
973                         arg->next = call->out_args;
974                         call->out_args = arg;
975
976                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
977                                 guint32 size, align;
978
979                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
980                                         size = sizeof (MonoTypedRef);
981                                         align = sizeof (gpointer);
982                                 }
983                                 else
984                                         if (sig->pinvoke)
985                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
986                                         else {
987                                                 int ialign;
988                                                 size = mono_type_stack_size (&in->klass->byval_arg, &ialign);
989                                                 align = ialign;
990                                         }
991                                 arg->opcode = OP_OUTARG_VT;
992                                 arg->klass = in->klass;
993                                 arg->backend.is_pinvoke = sig->pinvoke;
994                                 arg->inst_imm = size; 
995                         }
996                         else {
997                                 switch (ainfo->storage) {
998                                 case ArgOnStack:
999                                         arg->opcode = OP_OUTARG;
1000                                         if (!t->byref) {
1001                                                 if (t->type == MONO_TYPE_R4)
1002                                                         arg->opcode = OP_OUTARG_R4;
1003                                                 else
1004                                                         if (t->type == MONO_TYPE_R8)
1005                                                                 arg->opcode = OP_OUTARG_R8;
1006                                         }
1007                                         break;
1008                                 default:
1009                                         g_assert_not_reached ();
1010                                 }
1011                         }
1012                 }
1013         }
1014
1015         /* Handle the case where there are no implicit arguments */
1016         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1017                 emit_sig_cookie (cfg, call);
1018         }
1019
1020         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1021                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1022                         MonoInst *zero_inst;
1023                         /*
1024                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1025                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1026                          * before calling the function. So we add a dummy instruction to represent pushing the 
1027                          * struct return address to the stack. The return address will be saved to this stack slot 
1028                          * by the code emitted in this_vret_args.
1029                          */
1030                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1031                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1032                         zero_inst->inst_p0 = 0;
1033                         arg->inst_left = zero_inst;
1034                         arg->type = STACK_PTR;
1035                         /* prepend, so they get reversed */
1036                         arg->next = call->out_args;
1037                         call->out_args = arg;
1038                 }
1039                 else
1040                         /* if the function returns a struct, the called method already does a ret $0x4 */
1041                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1042                                 cinfo->stack_usage -= 4;
1043         }
1044         
1045         call->stack_usage = cinfo->stack_usage;
1046
1047 #if defined(__APPLE__)
1048         if (cinfo->need_stack_align) {
1049                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1050                 arg->inst_c0 = cinfo->stack_align_amount;
1051                 arg->next = call->out_args;
1052                 call->out_args = arg;
1053         }
1054 #endif 
1055
1056         g_free (cinfo);
1057
1058         return call;
1059 }
1060
1061 /*
1062  * Allow tracing to work with this interface (with an optional argument)
1063  */
1064 void*
1065 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1066 {
1067         guchar *code = p;
1068
1069         /* if some args are passed in registers, we need to save them here */
1070         x86_push_reg (code, X86_EBP);
1071
1072         if (cfg->compile_aot) {
1073                 x86_push_imm (code, cfg->method);
1074                 x86_mov_reg_imm (code, X86_EAX, func);
1075                 x86_call_reg (code, X86_EAX);
1076         } else {
1077                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1078                 x86_push_imm (code, cfg->method);
1079                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1080                 x86_call_code (code, 0);
1081         }
1082         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1083
1084         return code;
1085 }
1086
1087 enum {
1088         SAVE_NONE,
1089         SAVE_STRUCT,
1090         SAVE_EAX,
1091         SAVE_EAX_EDX,
1092         SAVE_FP
1093 };
1094
1095 void*
1096 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1097 {
1098         guchar *code = p;
1099         int arg_size = 0, save_mode = SAVE_NONE;
1100         MonoMethod *method = cfg->method;
1101         
1102         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1103         case MONO_TYPE_VOID:
1104                 /* special case string .ctor icall */
1105                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1106                         save_mode = SAVE_EAX;
1107                 else
1108                         save_mode = SAVE_NONE;
1109                 break;
1110         case MONO_TYPE_I8:
1111         case MONO_TYPE_U8:
1112                 save_mode = SAVE_EAX_EDX;
1113                 break;
1114         case MONO_TYPE_R4:
1115         case MONO_TYPE_R8:
1116                 save_mode = SAVE_FP;
1117                 break;
1118         case MONO_TYPE_GENERICINST:
1119                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1120                         save_mode = SAVE_EAX;
1121                         break;
1122                 }
1123                 /* Fall through */
1124         case MONO_TYPE_VALUETYPE:
1125                 save_mode = SAVE_STRUCT;
1126                 break;
1127         default:
1128                 save_mode = SAVE_EAX;
1129                 break;
1130         }
1131
1132         switch (save_mode) {
1133         case SAVE_EAX_EDX:
1134                 x86_push_reg (code, X86_EDX);
1135                 x86_push_reg (code, X86_EAX);
1136                 if (enable_arguments) {
1137                         x86_push_reg (code, X86_EDX);
1138                         x86_push_reg (code, X86_EAX);
1139                         arg_size = 8;
1140                 }
1141                 break;
1142         case SAVE_EAX:
1143                 x86_push_reg (code, X86_EAX);
1144                 if (enable_arguments) {
1145                         x86_push_reg (code, X86_EAX);
1146                         arg_size = 4;
1147                 }
1148                 break;
1149         case SAVE_FP:
1150                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1151                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1152                 if (enable_arguments) {
1153                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1154                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1155                         arg_size = 8;
1156                 }
1157                 break;
1158         case SAVE_STRUCT:
1159                 if (enable_arguments) {
1160                         x86_push_membase (code, X86_EBP, 8);
1161                         arg_size = 4;
1162                 }
1163                 break;
1164         case SAVE_NONE:
1165         default:
1166                 break;
1167         }
1168
1169         if (cfg->compile_aot) {
1170                 x86_push_imm (code, method);
1171                 x86_mov_reg_imm (code, X86_EAX, func);
1172                 x86_call_reg (code, X86_EAX);
1173         } else {
1174                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1175                 x86_push_imm (code, method);
1176                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1177                 x86_call_code (code, 0);
1178         }
1179         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1180
1181         switch (save_mode) {
1182         case SAVE_EAX_EDX:
1183                 x86_pop_reg (code, X86_EAX);
1184                 x86_pop_reg (code, X86_EDX);
1185                 break;
1186         case SAVE_EAX:
1187                 x86_pop_reg (code, X86_EAX);
1188                 break;
1189         case SAVE_FP:
1190                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1191                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1192                 break;
1193         case SAVE_NONE:
1194         default:
1195                 break;
1196         }
1197
1198         return code;
1199 }
1200
1201 #define EMIT_COND_BRANCH(ins,cond,sign) \
1202 if (ins->flags & MONO_INST_BRLABEL) { \
1203         if (ins->inst_i0->inst_c0) { \
1204                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1205         } else { \
1206                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1207                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1208                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1209                         x86_branch8 (code, cond, 0, sign); \
1210                 else \
1211                         x86_branch32 (code, cond, 0, sign); \
1212         } \
1213 } else { \
1214         if (ins->inst_true_bb->native_offset) { \
1215                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1216         } else { \
1217                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1218                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1219                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1220                         x86_branch8 (code, cond, 0, sign); \
1221                 else \
1222                         x86_branch32 (code, cond, 0, sign); \
1223         } \
1224 }
1225
1226 /*  
1227  *      Emit an exception if condition is fail and
1228  *  if possible do a directly branch to target 
1229  */
1230 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1231         do {                                                        \
1232                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1233                 if (tins == NULL) {                                                                             \
1234                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1235                                         MONO_PATCH_INFO_EXC, exc_name);  \
1236                         x86_branch32 (code, cond, 0, signed);               \
1237                 } else {        \
1238                         EMIT_COND_BRANCH (tins, cond, signed);  \
1239                 }                       \
1240         } while (0); 
1241
1242 #define EMIT_FPCOMPARE(code) do { \
1243         x86_fcompp (code); \
1244         x86_fnstsw (code); \
1245 } while (0); 
1246
1247
1248 static guint8*
1249 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1250 {
1251         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1252         x86_call_code (code, 0);
1253
1254         return code;
1255 }
1256
1257 /* FIXME: Add more instructions */
1258 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1259
1260 static void
1261 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1262 {
1263         MonoInst *ins, *last_ins = NULL;
1264         ins = bb->code;
1265
1266         while (ins) {
1267
1268                 switch (ins->opcode) {
1269                 case OP_ICONST:
1270                         /* reg = 0 -> XOR (reg, reg) */
1271                         /* XOR sets cflags on x86, so we cant do it always */
1272                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1273                                 ins->opcode = CEE_XOR;
1274                                 ins->sreg1 = ins->dreg;
1275                                 ins->sreg2 = ins->dreg;
1276                         }
1277                         break;
1278                 case OP_MUL_IMM: 
1279                         /* remove unnecessary multiplication with 1 */
1280                         if (ins->inst_imm == 1) {
1281                                 if (ins->dreg != ins->sreg1) {
1282                                         ins->opcode = OP_MOVE;
1283                                 } else {
1284                                         last_ins->next = ins->next;
1285                                         ins = ins->next;
1286                                         continue;
1287                                 }
1288                         }
1289                         break;
1290                 case OP_COMPARE_IMM:
1291                         /* OP_COMPARE_IMM (reg, 0) 
1292                          * --> 
1293                          * OP_X86_TEST_NULL (reg) 
1294                          */
1295                         if (!ins->inst_imm)
1296                                 ins->opcode = OP_X86_TEST_NULL;
1297                         break;
1298                 case OP_X86_COMPARE_MEMBASE_IMM:
1299                         /* 
1300                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1301                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1302                          * -->
1303                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1304                          * OP_COMPARE_IMM reg, imm
1305                          *
1306                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1307                          */
1308                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1309                             ins->inst_basereg == last_ins->inst_destbasereg &&
1310                             ins->inst_offset == last_ins->inst_offset) {
1311                                         ins->opcode = OP_COMPARE_IMM;
1312                                         ins->sreg1 = last_ins->sreg1;
1313
1314                                         /* check if we can remove cmp reg,0 with test null */
1315                                         if (!ins->inst_imm)
1316                                                 ins->opcode = OP_X86_TEST_NULL;
1317                                 }
1318
1319                         break;
1320                 case OP_LOAD_MEMBASE:
1321                 case OP_LOADI4_MEMBASE:
1322                         /* 
1323                          * Note: if reg1 = reg2 the load op is removed
1324                          *
1325                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1326                          * OP_LOAD_MEMBASE offset(basereg), reg2
1327                          * -->
1328                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1329                          * OP_MOVE reg1, reg2
1330                          */
1331                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1332                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1333                             ins->inst_basereg == last_ins->inst_destbasereg &&
1334                             ins->inst_offset == last_ins->inst_offset) {
1335                                 if (ins->dreg == last_ins->sreg1) {
1336                                         last_ins->next = ins->next;                             
1337                                         ins = ins->next;                                
1338                                         continue;
1339                                 } else {
1340                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1341                                         ins->opcode = OP_MOVE;
1342                                         ins->sreg1 = last_ins->sreg1;
1343                                 }
1344
1345                         /* 
1346                          * Note: reg1 must be different from the basereg in the second load
1347                          * Note: if reg1 = reg2 is equal then second load is removed
1348                          *
1349                          * OP_LOAD_MEMBASE offset(basereg), reg1
1350                          * OP_LOAD_MEMBASE offset(basereg), reg2
1351                          * -->
1352                          * OP_LOAD_MEMBASE offset(basereg), reg1
1353                          * OP_MOVE reg1, reg2
1354                          */
1355                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1356                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1357                               ins->inst_basereg != last_ins->dreg &&
1358                               ins->inst_basereg == last_ins->inst_basereg &&
1359                               ins->inst_offset == last_ins->inst_offset) {
1360
1361                                 if (ins->dreg == last_ins->dreg) {
1362                                         last_ins->next = ins->next;                             
1363                                         ins = ins->next;                                
1364                                         continue;
1365                                 } else {
1366                                         ins->opcode = OP_MOVE;
1367                                         ins->sreg1 = last_ins->dreg;
1368                                 }
1369
1370                                 //g_assert_not_reached ();
1371
1372 #if 0
1373                         /* 
1374                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1375                          * OP_LOAD_MEMBASE offset(basereg), reg
1376                          * -->
1377                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1378                          * OP_ICONST reg, imm
1379                          */
1380                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1381                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1382                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1383                                    ins->inst_offset == last_ins->inst_offset) {
1384                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1385                                 ins->opcode = OP_ICONST;
1386                                 ins->inst_c0 = last_ins->inst_imm;
1387                                 g_assert_not_reached (); // check this rule
1388 #endif
1389                         }
1390                         break;
1391                 case OP_LOADU1_MEMBASE:
1392                 case OP_LOADI1_MEMBASE:
1393                         /* 
1394                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1395                          * OP_LOAD_MEMBASE offset(basereg), reg2
1396                          * -->
1397                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1398                          * CONV_I2/U2 reg1, reg2
1399                          */
1400                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1401                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1402                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1403                                         ins->inst_offset == last_ins->inst_offset) {
1404                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1405                                 ins->sreg1 = last_ins->sreg1;
1406                         }
1407                         break;
1408                 case OP_LOADU2_MEMBASE:
1409                 case OP_LOADI2_MEMBASE:
1410                         /* 
1411                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1412                          * OP_LOAD_MEMBASE offset(basereg), reg2
1413                          * -->
1414                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1415                          * CONV_I2/U2 reg1, reg2
1416                          */
1417                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1418                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1419                                         ins->inst_offset == last_ins->inst_offset) {
1420                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1421                                 ins->sreg1 = last_ins->sreg1;
1422                         }
1423                         break;
1424                 case CEE_CONV_I4:
1425                 case CEE_CONV_U4:
1426                 case OP_MOVE:
1427                         /*
1428                          * Removes:
1429                          *
1430                          * OP_MOVE reg, reg 
1431                          */
1432                         if (ins->dreg == ins->sreg1) {
1433                                 if (last_ins)
1434                                         last_ins->next = ins->next;                             
1435                                 ins = ins->next;
1436                                 continue;
1437                         }
1438                         /* 
1439                          * Removes:
1440                          *
1441                          * OP_MOVE sreg, dreg 
1442                          * OP_MOVE dreg, sreg
1443                          */
1444                         if (last_ins && last_ins->opcode == OP_MOVE &&
1445                             ins->sreg1 == last_ins->dreg &&
1446                             ins->dreg == last_ins->sreg1) {
1447                                 last_ins->next = ins->next;                             
1448                                 ins = ins->next;                                
1449                                 continue;
1450                         }
1451                         break;
1452                         
1453                 case OP_X86_PUSH_MEMBASE:
1454                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1455                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1456                             ins->inst_basereg == last_ins->inst_destbasereg &&
1457                             ins->inst_offset == last_ins->inst_offset) {
1458                                     ins->opcode = OP_X86_PUSH;
1459                                     ins->sreg1 = last_ins->sreg1;
1460                         }
1461                         break;
1462                 }
1463                 last_ins = ins;
1464                 ins = ins->next;
1465         }
1466         bb->last_ins = last_ins;
1467 }
1468
1469 static const int 
1470 branch_cc_table [] = {
1471         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1472         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1473         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1474 };
1475
1476 static const char*const * ins_spec = x86_desc;
1477
1478 /*#include "cprop.c"*/
1479 void
1480 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1481 {
1482         mono_local_regalloc (cfg, bb);
1483 }
1484
1485 static unsigned char*
1486 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1487 {
1488         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1489         x86_fnstcw_membase(code, X86_ESP, 0);
1490         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1491         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1492         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1493         x86_fldcw_membase (code, X86_ESP, 2);
1494         if (size == 8) {
1495                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1496                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1497                 x86_pop_reg (code, dreg);
1498                 /* FIXME: need the high register 
1499                  * x86_pop_reg (code, dreg_high);
1500                  */
1501         } else {
1502                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1503                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1504                 x86_pop_reg (code, dreg);
1505         }
1506         x86_fldcw_membase (code, X86_ESP, 0);
1507         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1508
1509         if (size == 1)
1510                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1511         else if (size == 2)
1512                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1513         return code;
1514 }
1515
1516 static unsigned char*
1517 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1518 {
1519         int sreg = tree->sreg1;
1520         int need_touch = FALSE;
1521
1522 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1523         need_touch = TRUE;
1524 #endif
1525
1526         if (need_touch) {
1527                 guint8* br[5];
1528
1529                 /*
1530                  * Under Windows:
1531                  * If requested stack size is larger than one page,
1532                  * perform stack-touch operation
1533                  */
1534                 /*
1535                  * Generate stack probe code.
1536                  * Under Windows, it is necessary to allocate one page at a time,
1537                  * "touching" stack after each successful sub-allocation. This is
1538                  * because of the way stack growth is implemented - there is a
1539                  * guard page before the lowest stack page that is currently commited.
1540                  * Stack normally grows sequentially so OS traps access to the
1541                  * guard page and commits more pages when needed.
1542                  */
1543                 x86_test_reg_imm (code, sreg, ~0xFFF);
1544                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1545
1546                 br[2] = code; /* loop */
1547                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1548                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1549
1550                 /* 
1551                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1552                  * that follows only initializes the last part of the area.
1553                  */
1554                 /* Same as the init code below with size==0x1000 */
1555                 if (tree->flags & MONO_INST_INIT) {
1556                         x86_push_reg (code, X86_EAX);
1557                         x86_push_reg (code, X86_ECX);
1558                         x86_push_reg (code, X86_EDI);
1559                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1560                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1561                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1562                         x86_cld (code);
1563                         x86_prefix (code, X86_REP_PREFIX);
1564                         x86_stosl (code);
1565                         x86_pop_reg (code, X86_EDI);
1566                         x86_pop_reg (code, X86_ECX);
1567                         x86_pop_reg (code, X86_EAX);
1568                 }
1569
1570                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1571                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1572                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1573                 x86_patch (br[3], br[2]);
1574                 x86_test_reg_reg (code, sreg, sreg);
1575                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1576                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1577
1578                 br[1] = code; x86_jump8 (code, 0);
1579
1580                 x86_patch (br[0], code);
1581                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1582                 x86_patch (br[1], code);
1583                 x86_patch (br[4], code);
1584         }
1585         else
1586                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1587
1588         if (tree->flags & MONO_INST_INIT) {
1589                 int offset = 0;
1590                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1591                         x86_push_reg (code, X86_EAX);
1592                         offset += 4;
1593                 }
1594                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1595                         x86_push_reg (code, X86_ECX);
1596                         offset += 4;
1597                 }
1598                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1599                         x86_push_reg (code, X86_EDI);
1600                         offset += 4;
1601                 }
1602                 
1603                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1604                 if (sreg != X86_ECX)
1605                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1606                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1607                                 
1608                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1609                 x86_cld (code);
1610                 x86_prefix (code, X86_REP_PREFIX);
1611                 x86_stosl (code);
1612                 
1613                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1614                         x86_pop_reg (code, X86_EDI);
1615                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1616                         x86_pop_reg (code, X86_ECX);
1617                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1618                         x86_pop_reg (code, X86_EAX);
1619         }
1620         return code;
1621 }
1622
1623
1624 static guint8*
1625 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1626 {
1627         CallInfo *cinfo;
1628         int quad;
1629
1630         /* Move return value to the target register */
1631         switch (ins->opcode) {
1632         case CEE_CALL:
1633         case OP_CALL_REG:
1634         case OP_CALL_MEMBASE:
1635                 if (ins->dreg != X86_EAX)
1636                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1637                 break;
1638         case OP_VCALL:
1639         case OP_VCALL_REG:
1640         case OP_VCALL_MEMBASE:
1641                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
1642                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1643                         /* Pop the destination address from the stack */
1644                         x86_pop_reg (code, X86_ECX);
1645                         
1646                         for (quad = 0; quad < 2; quad ++) {
1647                                 switch (cinfo->ret.pair_storage [quad]) {
1648                                 case ArgInIReg:
1649                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1650                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1651                                         break;
1652                                 case ArgNone:
1653                                         break;
1654                                 default:
1655                                         g_assert_not_reached ();
1656                                 }
1657                         }
1658                 }
1659                 g_free (cinfo);
1660         default:
1661                 break;
1662         }
1663
1664         return code;
1665 }
1666
1667 /*
1668  * emit_tls_get:
1669  * @code: buffer to store code to
1670  * @dreg: hard register where to place the result
1671  * @tls_offset: offset info
1672  *
1673  * emit_tls_get emits in @code the native code that puts in the dreg register
1674  * the item in the thread local storage identified by tls_offset.
1675  *
1676  * Returns: a pointer to the end of the stored code
1677  */
1678 static guint8*
1679 emit_tls_get (guint8* code, int dreg, int tls_offset)
1680 {
1681 #ifdef PLATFORM_WIN32
1682         /* 
1683          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1684          * Journal and/or a disassembly of the TlsGet () function.
1685          */
1686         g_assert (tls_offset < 64);
1687         x86_prefix (code, X86_FS_PREFIX);
1688         x86_mov_reg_mem (code, dreg, 0x18, 4);
1689         /* Dunno what this does but TlsGetValue () contains it */
1690         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1691         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1692 #else
1693         if (optimize_for_xen) {
1694                 x86_prefix (code, X86_GS_PREFIX);
1695                 x86_mov_reg_mem (code, dreg, 0, 4);
1696                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1697         } else {
1698                 x86_prefix (code, X86_GS_PREFIX);
1699                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1700         }
1701 #endif
1702         return code;
1703 }
1704
1705 #define REAL_PRINT_REG(text,reg) \
1706 mono_assert (reg >= 0); \
1707 x86_push_reg (code, X86_EAX); \
1708 x86_push_reg (code, X86_EDX); \
1709 x86_push_reg (code, X86_ECX); \
1710 x86_push_reg (code, reg); \
1711 x86_push_imm (code, reg); \
1712 x86_push_imm (code, text " %d %p\n"); \
1713 x86_mov_reg_imm (code, X86_EAX, printf); \
1714 x86_call_reg (code, X86_EAX); \
1715 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1716 x86_pop_reg (code, X86_ECX); \
1717 x86_pop_reg (code, X86_EDX); \
1718 x86_pop_reg (code, X86_EAX);
1719
1720 /* benchmark and set based on cpu */
1721 #define LOOP_ALIGNMENT 8
1722 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
1723
1724 void
1725 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1726 {
1727         MonoInst *ins;
1728         MonoCallInst *call;
1729         guint offset;
1730         guint8 *code = cfg->native_code + cfg->code_len;
1731         MonoInst *last_ins = NULL;
1732         guint last_offset = 0;
1733         int max_len, cpos;
1734
1735         if (cfg->opt & MONO_OPT_PEEPHOLE)
1736                 peephole_pass (cfg, bb);
1737
1738         if (cfg->opt & MONO_OPT_LOOP) {
1739                 int pad, align = LOOP_ALIGNMENT;
1740                 /* set alignment depending on cpu */
1741                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
1742                         pad = align - pad;
1743                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
1744                         x86_padding (code, pad);
1745                         cfg->code_len += pad;
1746                         bb->native_offset = cfg->code_len;
1747                 }
1748         }
1749
1750         if (cfg->verbose_level > 2)
1751                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1752
1753         cpos = bb->max_offset;
1754
1755         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
1756                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
1757                 g_assert (!cfg->compile_aot);
1758                 cpos += 6;
1759
1760                 cov->data [bb->dfn].cil_code = bb->cil_code;
1761                 /* this is not thread save, but good enough */
1762                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
1763         }
1764
1765         offset = code - cfg->native_code;
1766
1767         mono_debug_open_block (cfg, bb, offset);
1768
1769         ins = bb->code;
1770         while (ins) {
1771                 offset = code - cfg->native_code;
1772
1773                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
1774
1775                 if (offset > (cfg->code_size - max_len - 16)) {
1776                         cfg->code_size *= 2;
1777                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
1778                         code = cfg->native_code + offset;
1779                         mono_jit_stats.code_reallocs++;
1780                 }
1781
1782                 mono_debug_record_line_number (cfg, ins, offset);
1783
1784                 switch (ins->opcode) {
1785                 case OP_BIGMUL:
1786                         x86_mul_reg (code, ins->sreg2, TRUE);
1787                         break;
1788                 case OP_BIGMUL_UN:
1789                         x86_mul_reg (code, ins->sreg2, FALSE);
1790                         break;
1791                 case OP_X86_SETEQ_MEMBASE:
1792                 case OP_X86_SETNE_MEMBASE:
1793                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
1794                                          ins->inst_basereg, ins->inst_offset, TRUE);
1795                         break;
1796                 case OP_STOREI1_MEMBASE_IMM:
1797                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
1798                         break;
1799                 case OP_STOREI2_MEMBASE_IMM:
1800                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
1801                         break;
1802                 case OP_STORE_MEMBASE_IMM:
1803                 case OP_STOREI4_MEMBASE_IMM:
1804                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
1805                         break;
1806                 case OP_STOREI1_MEMBASE_REG:
1807                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
1808                         break;
1809                 case OP_STOREI2_MEMBASE_REG:
1810                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
1811                         break;
1812                 case OP_STORE_MEMBASE_REG:
1813                 case OP_STOREI4_MEMBASE_REG:
1814                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
1815                         break;
1816                 case CEE_LDIND_I:
1817                 case CEE_LDIND_I4:
1818                 case CEE_LDIND_U4:
1819                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
1820                         break;
1821                 case OP_LOADU4_MEM:
1822                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
1823                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
1824                         break;
1825                 case OP_LOAD_MEMBASE:
1826                 case OP_LOADI4_MEMBASE:
1827                 case OP_LOADU4_MEMBASE:
1828                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
1829                         break;
1830                 case OP_LOADU1_MEMBASE:
1831                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
1832                         break;
1833                 case OP_LOADI1_MEMBASE:
1834                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
1835                         break;
1836                 case OP_LOADU2_MEMBASE:
1837                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
1838                         break;
1839                 case OP_LOADI2_MEMBASE:
1840                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
1841                         break;
1842                 case CEE_CONV_I1:
1843                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
1844                         break;
1845                 case CEE_CONV_I2:
1846                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
1847                         break;
1848                 case CEE_CONV_U1:
1849                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
1850                         break;
1851                 case CEE_CONV_U2:
1852                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
1853                         break;
1854                 case OP_COMPARE:
1855                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
1856                         break;
1857                 case OP_COMPARE_IMM:
1858                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
1859                         break;
1860                 case OP_X86_COMPARE_MEMBASE_REG:
1861                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
1862                         break;
1863                 case OP_X86_COMPARE_MEMBASE_IMM:
1864                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1865                         break;
1866                 case OP_X86_COMPARE_MEMBASE8_IMM:
1867                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1868                         break;
1869                 case OP_X86_COMPARE_REG_MEMBASE:
1870                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
1871                         break;
1872                 case OP_X86_COMPARE_MEM_IMM:
1873                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
1874                         break;
1875                 case OP_X86_TEST_NULL:
1876                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
1877                         break;
1878                 case OP_X86_ADD_MEMBASE_IMM:
1879                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1880                         break;
1881                 case OP_X86_ADD_MEMBASE:
1882                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
1883                         break;
1884                 case OP_X86_SUB_MEMBASE_IMM:
1885                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1886                         break;
1887                 case OP_X86_SUB_MEMBASE:
1888                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
1889                         break;
1890                 case OP_X86_AND_MEMBASE_IMM:
1891                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1892                         break;
1893                 case OP_X86_OR_MEMBASE_IMM:
1894                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1895                         break;
1896                 case OP_X86_XOR_MEMBASE_IMM:
1897                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1898                         break;
1899                 case OP_X86_INC_MEMBASE:
1900                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
1901                         break;
1902                 case OP_X86_INC_REG:
1903                         x86_inc_reg (code, ins->dreg);
1904                         break;
1905                 case OP_X86_DEC_MEMBASE:
1906                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
1907                         break;
1908                 case OP_X86_DEC_REG:
1909                         x86_dec_reg (code, ins->dreg);
1910                         break;
1911                 case OP_X86_MUL_MEMBASE:
1912                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
1913                         break;
1914                 case CEE_BREAK:
1915                         x86_breakpoint (code);
1916                         break;
1917                 case OP_ADDCC:
1918                 case CEE_ADD:
1919                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
1920                         break;
1921                 case OP_ADC:
1922                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
1923                         break;
1924                 case OP_ADDCC_IMM:
1925                 case OP_ADD_IMM:
1926                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
1927                         break;
1928                 case OP_ADC_IMM:
1929                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
1930                         break;
1931                 case OP_SUBCC:
1932                 case CEE_SUB:
1933                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
1934                         break;
1935                 case OP_SBB:
1936                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
1937                         break;
1938                 case OP_SUBCC_IMM:
1939                 case OP_SUB_IMM:
1940                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
1941                         break;
1942                 case OP_SBB_IMM:
1943                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
1944                         break;
1945                 case CEE_AND:
1946                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
1947                         break;
1948                 case OP_AND_IMM:
1949                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
1950                         break;
1951                 case CEE_DIV:
1952                         x86_cdq (code);
1953                         x86_div_reg (code, ins->sreg2, TRUE);
1954                         break;
1955                 case CEE_DIV_UN:
1956                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1957                         x86_div_reg (code, ins->sreg2, FALSE);
1958                         break;
1959                 case OP_DIV_IMM:
1960                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1961                         x86_cdq (code);
1962                         x86_div_reg (code, ins->sreg2, TRUE);
1963                         break;
1964                 case CEE_REM:
1965                         x86_cdq (code);
1966                         x86_div_reg (code, ins->sreg2, TRUE);
1967                         break;
1968                 case CEE_REM_UN:
1969                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1970                         x86_div_reg (code, ins->sreg2, FALSE);
1971                         break;
1972                 case OP_REM_IMM:
1973                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1974                         x86_cdq (code);
1975                         x86_div_reg (code, ins->sreg2, TRUE);
1976                         break;
1977                 case CEE_OR:
1978                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
1979                         break;
1980                 case OP_OR_IMM:
1981                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
1982                         break;
1983                 case CEE_XOR:
1984                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
1985                         break;
1986                 case OP_XOR_IMM:
1987                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
1988                         break;
1989                 case CEE_SHL:
1990                         g_assert (ins->sreg2 == X86_ECX);
1991                         x86_shift_reg (code, X86_SHL, ins->dreg);
1992                         break;
1993                 case CEE_SHR:
1994                         g_assert (ins->sreg2 == X86_ECX);
1995                         x86_shift_reg (code, X86_SAR, ins->dreg);
1996                         break;
1997                 case OP_SHR_IMM:
1998                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
1999                         break;
2000                 case OP_SHR_UN_IMM:
2001                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2002                         break;
2003                 case CEE_SHR_UN:
2004                         g_assert (ins->sreg2 == X86_ECX);
2005                         x86_shift_reg (code, X86_SHR, ins->dreg);
2006                         break;
2007                 case OP_SHL_IMM:
2008                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2009                         break;
2010                 case OP_LSHL: {
2011                         guint8 *jump_to_end;
2012
2013                         /* handle shifts below 32 bits */
2014                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2015                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2016
2017                         x86_test_reg_imm (code, X86_ECX, 32);
2018                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2019
2020                         /* handle shift over 32 bit */
2021                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2022                         x86_clear_reg (code, ins->sreg1);
2023                         
2024                         x86_patch (jump_to_end, code);
2025                         }
2026                         break;
2027                 case OP_LSHR: {
2028                         guint8 *jump_to_end;
2029
2030                         /* handle shifts below 32 bits */
2031                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2032                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2033
2034                         x86_test_reg_imm (code, X86_ECX, 32);
2035                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2036
2037                         /* handle shifts over 31 bits */
2038                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2039                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2040                         
2041                         x86_patch (jump_to_end, code);
2042                         }
2043                         break;
2044                 case OP_LSHR_UN: {
2045                         guint8 *jump_to_end;
2046
2047                         /* handle shifts below 32 bits */
2048                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2049                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2050
2051                         x86_test_reg_imm (code, X86_ECX, 32);
2052                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2053
2054                         /* handle shifts over 31 bits */
2055                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2056                         x86_clear_reg (code, ins->backend.reg3);
2057                         
2058                         x86_patch (jump_to_end, code);
2059                         }
2060                         break;
2061                 case OP_LSHL_IMM:
2062                         if (ins->inst_imm >= 32) {
2063                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2064                                 x86_clear_reg (code, ins->sreg1);
2065                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2066                         } else {
2067                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2068                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2069                         }
2070                         break;
2071                 case OP_LSHR_IMM:
2072                         if (ins->inst_imm >= 32) {
2073                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2074                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2075                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2076                         } else {
2077                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2078                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2079                         }
2080                         break;
2081                 case OP_LSHR_UN_IMM:
2082                         if (ins->inst_imm >= 32) {
2083                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2084                                 x86_clear_reg (code, ins->backend.reg3);
2085                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2086                         } else {
2087                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2088                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2089                         }
2090                         break;
2091                 case CEE_NOT:
2092                         x86_not_reg (code, ins->sreg1);
2093                         break;
2094                 case CEE_NEG:
2095                         x86_neg_reg (code, ins->sreg1);
2096                         break;
2097                 case OP_SEXT_I1:
2098                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2099                         break;
2100                 case OP_SEXT_I2:
2101                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2102                         break;
2103                 case CEE_MUL:
2104                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2105                         break;
2106                 case OP_MUL_IMM:
2107                         switch (ins->inst_imm) {
2108                         case 2:
2109                                 /* MOV r1, r2 */
2110                                 /* ADD r1, r1 */
2111                                 if (ins->dreg != ins->sreg1)
2112                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2113                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2114                                 break;
2115                         case 3:
2116                                 /* LEA r1, [r2 + r2*2] */
2117                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2118                                 break;
2119                         case 5:
2120                                 /* LEA r1, [r2 + r2*4] */
2121                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2122                                 break;
2123                         case 6:
2124                                 /* LEA r1, [r2 + r2*2] */
2125                                 /* ADD r1, r1          */
2126                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2127                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2128                                 break;
2129                         case 9:
2130                                 /* LEA r1, [r2 + r2*8] */
2131                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2132                                 break;
2133                         case 10:
2134                                 /* LEA r1, [r2 + r2*4] */
2135                                 /* ADD r1, r1          */
2136                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2137                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2138                                 break;
2139                         case 12:
2140                                 /* LEA r1, [r2 + r2*2] */
2141                                 /* SHL r1, 2           */
2142                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2143                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2144                                 break;
2145                         case 25:
2146                                 /* LEA r1, [r2 + r2*4] */
2147                                 /* LEA r1, [r1 + r1*4] */
2148                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2149                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2150                                 break;
2151                         case 100:
2152                                 /* LEA r1, [r2 + r2*4] */
2153                                 /* SHL r1, 2           */
2154                                 /* LEA r1, [r1 + r1*4] */
2155                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2156                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2157                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2158                                 break;
2159                         default:
2160                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2161                                 break;
2162                         }
2163                         break;
2164                 case CEE_MUL_OVF:
2165                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2166                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2167                         break;
2168                 case CEE_MUL_OVF_UN: {
2169                         /* the mul operation and the exception check should most likely be split */
2170                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2171                         /*g_assert (ins->sreg2 == X86_EAX);
2172                         g_assert (ins->dreg == X86_EAX);*/
2173                         if (ins->sreg2 == X86_EAX) {
2174                                 non_eax_reg = ins->sreg1;
2175                         } else if (ins->sreg1 == X86_EAX) {
2176                                 non_eax_reg = ins->sreg2;
2177                         } else {
2178                                 /* no need to save since we're going to store to it anyway */
2179                                 if (ins->dreg != X86_EAX) {
2180                                         saved_eax = TRUE;
2181                                         x86_push_reg (code, X86_EAX);
2182                                 }
2183                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2184                                 non_eax_reg = ins->sreg2;
2185                         }
2186                         if (ins->dreg == X86_EDX) {
2187                                 if (!saved_eax) {
2188                                         saved_eax = TRUE;
2189                                         x86_push_reg (code, X86_EAX);
2190                                 }
2191                         } else if (ins->dreg != X86_EAX) {
2192                                 saved_edx = TRUE;
2193                                 x86_push_reg (code, X86_EDX);
2194                         }
2195                         x86_mul_reg (code, non_eax_reg, FALSE);
2196                         /* save before the check since pop and mov don't change the flags */
2197                         if (ins->dreg != X86_EAX)
2198                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2199                         if (saved_edx)
2200                                 x86_pop_reg (code, X86_EDX);
2201                         if (saved_eax)
2202                                 x86_pop_reg (code, X86_EAX);
2203                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2204                         break;
2205                 }
2206                 case OP_ICONST:
2207                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2208                         break;
2209                 case OP_AOTCONST:
2210                         g_assert_not_reached ();
2211                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2212                         x86_mov_reg_imm (code, ins->dreg, 0);
2213                         break;
2214                 case OP_LOAD_GOTADDR:
2215                         x86_call_imm (code, 0);
2216                         /* 
2217                          * The patch needs to point to the pop, since the GOT offset needs 
2218                          * to be added to that address.
2219                          */
2220                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2221                         x86_pop_reg (code, ins->dreg);
2222                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2223                         break;
2224                 case OP_GOT_ENTRY:
2225                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2226                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2227                         break;
2228                 case OP_X86_PUSH_GOT_ENTRY:
2229                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2230                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2231                         break;
2232                 case CEE_CONV_I4:
2233                 case OP_MOVE:
2234                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2235                         break;
2236                 case CEE_CONV_U4:
2237                         g_assert_not_reached ();
2238                 case CEE_JMP: {
2239                         /*
2240                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2241                          * Keep in sync with the code in emit_epilog.
2242                          */
2243                         int pos = 0;
2244
2245                         /* FIXME: no tracing support... */
2246                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2247                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2248                         /* reset offset to make max_len work */
2249                         offset = code - cfg->native_code;
2250
2251                         g_assert (!cfg->method->save_lmf);
2252
2253                         if (cfg->used_int_regs & (1 << X86_EBX))
2254                                 pos -= 4;
2255                         if (cfg->used_int_regs & (1 << X86_EDI))
2256                                 pos -= 4;
2257                         if (cfg->used_int_regs & (1 << X86_ESI))
2258                                 pos -= 4;
2259                         if (pos)
2260                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2261         
2262                         if (cfg->used_int_regs & (1 << X86_ESI))
2263                                 x86_pop_reg (code, X86_ESI);
2264                         if (cfg->used_int_regs & (1 << X86_EDI))
2265                                 x86_pop_reg (code, X86_EDI);
2266                         if (cfg->used_int_regs & (1 << X86_EBX))
2267                                 x86_pop_reg (code, X86_EBX);
2268         
2269                         /* restore ESP/EBP */
2270                         x86_leave (code);
2271                         offset = code - cfg->native_code;
2272                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2273                         x86_jump32 (code, 0);
2274                         break;
2275                 }
2276                 case OP_CHECK_THIS:
2277                         /* ensure ins->sreg1 is not NULL
2278                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2279                          * cmp DWORD PTR [eax], 0
2280                          */
2281                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2282                         break;
2283                 case OP_ARGLIST: {
2284                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2285                         x86_push_reg (code, hreg);
2286                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2287                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2288                         x86_pop_reg (code, hreg);
2289                         break;
2290                 }
2291                 case OP_FCALL:
2292                 case OP_LCALL:
2293                 case OP_VCALL:
2294                 case OP_VOIDCALL:
2295                 case CEE_CALL:
2296                         call = (MonoCallInst*)ins;
2297                         if (ins->flags & MONO_INST_HAS_METHOD)
2298                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2299                         else
2300                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2301                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2302                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2303                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2304                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2305                                  * smart enough to do that optimization yet
2306                                  *
2307                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2308                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2309                                  * (most likely from locality benefits). People with other processors should
2310                                  * check on theirs to see what happens.
2311                                  */
2312                                 if (call->stack_usage == 4) {
2313                                         /* we want to use registers that won't get used soon, so use
2314                                          * ecx, as eax will get allocated first. edx is used by long calls,
2315                                          * so we can't use that.
2316                                          */
2317                                         
2318                                         x86_pop_reg (code, X86_ECX);
2319                                 } else {
2320                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2321                                 }
2322                         }
2323                         code = emit_move_return_value (cfg, ins, code);
2324                         break;
2325                 case OP_FCALL_REG:
2326                 case OP_LCALL_REG:
2327                 case OP_VCALL_REG:
2328                 case OP_VOIDCALL_REG:
2329                 case OP_CALL_REG:
2330                         call = (MonoCallInst*)ins;
2331                         x86_call_reg (code, ins->sreg1);
2332                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2333                                 if (call->stack_usage == 4)
2334                                         x86_pop_reg (code, X86_ECX);
2335                                 else
2336                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2337                         }
2338                         code = emit_move_return_value (cfg, ins, code);
2339                         break;
2340                 case OP_FCALL_MEMBASE:
2341                 case OP_LCALL_MEMBASE:
2342                 case OP_VCALL_MEMBASE:
2343                 case OP_VOIDCALL_MEMBASE:
2344                 case OP_CALL_MEMBASE:
2345                         call = (MonoCallInst*)ins;
2346                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2347                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2348                                 if (call->stack_usage == 4)
2349                                         x86_pop_reg (code, X86_ECX);
2350                                 else
2351                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2352                         }
2353                         code = emit_move_return_value (cfg, ins, code);
2354                         break;
2355                 case OP_OUTARG:
2356                 case OP_X86_PUSH:
2357                         x86_push_reg (code, ins->sreg1);
2358                         break;
2359                 case OP_X86_PUSH_IMM:
2360                         x86_push_imm (code, ins->inst_imm);
2361                         break;
2362                 case OP_X86_PUSH_MEMBASE:
2363                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2364                         break;
2365                 case OP_X86_PUSH_OBJ: 
2366                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2367                         x86_push_reg (code, X86_EDI);
2368                         x86_push_reg (code, X86_ESI);
2369                         x86_push_reg (code, X86_ECX);
2370                         if (ins->inst_offset)
2371                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2372                         else
2373                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2374                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2375                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2376                         x86_cld (code);
2377                         x86_prefix (code, X86_REP_PREFIX);
2378                         x86_movsd (code);
2379                         x86_pop_reg (code, X86_ECX);
2380                         x86_pop_reg (code, X86_ESI);
2381                         x86_pop_reg (code, X86_EDI);
2382                         break;
2383                 case OP_X86_LEA:
2384                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2385                         break;
2386                 case OP_X86_LEA_MEMBASE:
2387                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2388                         break;
2389                 case OP_X86_XCHG:
2390                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2391                         break;
2392                 case OP_LOCALLOC:
2393                         /* keep alignment */
2394                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2395                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2396                         code = mono_emit_stack_alloc (code, ins);
2397                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2398                         break;
2399                 case CEE_RET:
2400                         x86_ret (code);
2401                         break;
2402                 case CEE_THROW: {
2403                         x86_push_reg (code, ins->sreg1);
2404                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2405                                                           (gpointer)"mono_arch_throw_exception");
2406                         break;
2407                 }
2408                 case OP_RETHROW: {
2409                         x86_push_reg (code, ins->sreg1);
2410                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2411                                                           (gpointer)"mono_arch_rethrow_exception");
2412                         break;
2413                 }
2414                 case OP_CALL_HANDLER: 
2415                         /* Align stack */
2416 #ifdef __APPLE__
2417                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2418 #endif
2419                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2420                         x86_call_imm (code, 0);
2421 #ifdef __APPLE__
2422                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2423 #endif
2424                         break;
2425                 case OP_LABEL:
2426                         ins->inst_c0 = code - cfg->native_code;
2427                         break;
2428                 case CEE_BR:
2429                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2430                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2431                         //break;
2432                         if (ins->flags & MONO_INST_BRLABEL) {
2433                                 if (ins->inst_i0->inst_c0) {
2434                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2435                                 } else {
2436                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2437                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2438                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2439                                                 x86_jump8 (code, 0);
2440                                         else 
2441                                                 x86_jump32 (code, 0);
2442                                 }
2443                         } else {
2444                                 if (ins->inst_target_bb->native_offset) {
2445                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2446                                 } else {
2447                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2448                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2449                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2450                                                 x86_jump8 (code, 0);
2451                                         else 
2452                                                 x86_jump32 (code, 0);
2453                                 } 
2454                         }
2455                         break;
2456                 case OP_BR_REG:
2457                         x86_jump_reg (code, ins->sreg1);
2458                         break;
2459                 case OP_CEQ:
2460                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2461                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2462                         break;
2463                 case OP_CLT:
2464                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2465                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2466                         break;
2467                 case OP_CLT_UN:
2468                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2469                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2470                         break;
2471                 case OP_CGT:
2472                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2473                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2474                         break;
2475                 case OP_CGT_UN:
2476                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2477                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2478                         break;
2479                 case OP_CNE:
2480                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2481                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2482                         break;
2483                 case OP_COND_EXC_EQ:
2484                 case OP_COND_EXC_NE_UN:
2485                 case OP_COND_EXC_LT:
2486                 case OP_COND_EXC_LT_UN:
2487                 case OP_COND_EXC_GT:
2488                 case OP_COND_EXC_GT_UN:
2489                 case OP_COND_EXC_GE:
2490                 case OP_COND_EXC_GE_UN:
2491                 case OP_COND_EXC_LE:
2492                 case OP_COND_EXC_LE_UN:
2493                 case OP_COND_EXC_OV:
2494                 case OP_COND_EXC_NO:
2495                 case OP_COND_EXC_C:
2496                 case OP_COND_EXC_NC:
2497                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2498                         break;
2499                 case CEE_BEQ:
2500                 case CEE_BNE_UN:
2501                 case CEE_BLT:
2502                 case CEE_BLT_UN:
2503                 case CEE_BGT:
2504                 case CEE_BGT_UN:
2505                 case CEE_BGE:
2506                 case CEE_BGE_UN:
2507                 case CEE_BLE:
2508                 case CEE_BLE_UN:
2509                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2510                         break;
2511
2512                 /* floating point opcodes */
2513                 case OP_R8CONST: {
2514                         double d = *(double *)ins->inst_p0;
2515
2516                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2517                                 x86_fldz (code);
2518                         } else if (d == 1.0) {
2519                                 x86_fld1 (code);
2520                         } else {
2521                                 if (cfg->compile_aot) {
2522                                         guint32 *val = (guint32*)&d;
2523                                         x86_push_imm (code, val [1]);
2524                                         x86_push_imm (code, val [0]);
2525                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2526                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2527                                 }
2528                                 else {
2529                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2530                                         x86_fld (code, NULL, TRUE);
2531                                 }
2532                         }
2533                         break;
2534                 }
2535                 case OP_R4CONST: {
2536                         float f = *(float *)ins->inst_p0;
2537
2538                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2539                                 x86_fldz (code);
2540                         } else if (f == 1.0) {
2541                                 x86_fld1 (code);
2542                         } else {
2543                                 if (cfg->compile_aot) {
2544                                         guint32 val = *(guint32*)&f;
2545                                         x86_push_imm (code, val);
2546                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2547                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2548                                 }
2549                                 else {
2550                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2551                                         x86_fld (code, NULL, FALSE);
2552                                 }
2553                         }
2554                         break;
2555                 }
2556                 case OP_STORER8_MEMBASE_REG:
2557                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2558                         break;
2559                 case OP_LOADR8_SPILL_MEMBASE:
2560                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2561                         x86_fxch (code, 1);
2562                         break;
2563                 case OP_LOADR8_MEMBASE:
2564                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2565                         break;
2566                 case OP_STORER4_MEMBASE_REG:
2567                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2568                         break;
2569                 case OP_LOADR4_MEMBASE:
2570                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2571                         break;
2572                 case CEE_CONV_R4: /* FIXME: change precision */
2573                 case CEE_CONV_R8:
2574                         x86_push_reg (code, ins->sreg1);
2575                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2576                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2577                         break;
2578                 case OP_X86_FP_LOAD_I8:
2579                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2580                         break;
2581                 case OP_X86_FP_LOAD_I4:
2582                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2583                         break;
2584                 case OP_FCONV_TO_I1:
2585                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2586                         break;
2587                 case OP_FCONV_TO_U1:
2588                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2589                         break;
2590                 case OP_FCONV_TO_I2:
2591                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2592                         break;
2593                 case OP_FCONV_TO_U2:
2594                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2595                         break;
2596                 case OP_FCONV_TO_I4:
2597                 case OP_FCONV_TO_I:
2598                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2599                         break;
2600                 case OP_FCONV_TO_I8:
2601                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2602                         x86_fnstcw_membase(code, X86_ESP, 0);
2603                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2604                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2605                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2606                         x86_fldcw_membase (code, X86_ESP, 2);
2607                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2608                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2609                         x86_pop_reg (code, ins->dreg);
2610                         x86_pop_reg (code, ins->backend.reg3);
2611                         x86_fldcw_membase (code, X86_ESP, 0);
2612                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2613                         break;
2614                 case OP_LCONV_TO_R_UN: { 
2615                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2616                         guint8 *br;
2617
2618                         /* load 64bit integer to FP stack */
2619                         x86_push_imm (code, 0);
2620                         x86_push_reg (code, ins->sreg2);
2621                         x86_push_reg (code, ins->sreg1);
2622                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2623                         /* store as 80bit FP value */
2624                         x86_fst80_membase (code, X86_ESP, 0);
2625                         
2626                         /* test if lreg is negative */
2627                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2628                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2629         
2630                         /* add correction constant mn */
2631                         x86_fld80_mem (code, mn);
2632                         x86_fld80_membase (code, X86_ESP, 0);
2633                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2634                         x86_fst80_membase (code, X86_ESP, 0);
2635
2636                         x86_patch (br, code);
2637
2638                         x86_fld80_membase (code, X86_ESP, 0);
2639                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2640
2641                         break;
2642                 }
2643                 case OP_LCONV_TO_OVF_I: {
2644                         guint8 *br [3], *label [1];
2645                         MonoInst *tins;
2646
2647                         /* 
2648                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2649                          */
2650                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2651
2652                         /* If the low word top bit is set, see if we are negative */
2653                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2654                         /* We are not negative (no top bit set, check for our top word to be zero */
2655                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2656                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2657                         label [0] = code;
2658
2659                         /* throw exception */
2660                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2661                         if (tins) {
2662                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2663                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2664                                         x86_jump8 (code, 0);
2665                                 else
2666                                         x86_jump32 (code, 0);
2667                         } else {
2668                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2669                                 x86_jump32 (code, 0);
2670                         }
2671         
2672         
2673                         x86_patch (br [0], code);
2674                         /* our top bit is set, check that top word is 0xfffffff */
2675                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2676                 
2677                         x86_patch (br [1], code);
2678                         /* nope, emit exception */
2679                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2680                         x86_patch (br [2], label [0]);
2681
2682                         if (ins->dreg != ins->sreg1)
2683                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2684                         break;
2685                 }
2686                 case OP_FADD:
2687                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2688                         break;
2689                 case OP_FSUB:
2690                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2691                         break;          
2692                 case OP_FMUL:
2693                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2694                         break;          
2695                 case OP_FDIV:
2696                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2697                         break;          
2698                 case OP_FNEG:
2699                         x86_fchs (code);
2700                         break;          
2701                 case OP_SIN:
2702                         x86_fsin (code);
2703                         x86_fldz (code);
2704                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2705                         break;          
2706                 case OP_COS:
2707                         x86_fcos (code);
2708                         x86_fldz (code);
2709                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2710                         break;          
2711                 case OP_ABS:
2712                         x86_fabs (code);
2713                         break;          
2714                 case OP_TAN: {
2715                         /* 
2716                          * it really doesn't make sense to inline all this code,
2717                          * it's here just to show that things may not be as simple 
2718                          * as they appear.
2719                          */
2720                         guchar *check_pos, *end_tan, *pop_jump;
2721                         x86_push_reg (code, X86_EAX);
2722                         x86_fptan (code);
2723                         x86_fnstsw (code);
2724                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2725                         check_pos = code;
2726                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2727                         x86_fstp (code, 0); /* pop the 1.0 */
2728                         end_tan = code;
2729                         x86_jump8 (code, 0);
2730                         x86_fldpi (code);
2731                         x86_fp_op (code, X86_FADD, 0);
2732                         x86_fxch (code, 1);
2733                         x86_fprem1 (code);
2734                         x86_fstsw (code);
2735                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2736                         pop_jump = code;
2737                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2738                         x86_fstp (code, 1);
2739                         x86_fptan (code);
2740                         x86_patch (pop_jump, code);
2741                         x86_fstp (code, 0); /* pop the 1.0 */
2742                         x86_patch (check_pos, code);
2743                         x86_patch (end_tan, code);
2744                         x86_fldz (code);
2745                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2746                         x86_pop_reg (code, X86_EAX);
2747                         break;
2748                 }
2749                 case OP_ATAN:
2750                         x86_fld1 (code);
2751                         x86_fpatan (code);
2752                         x86_fldz (code);
2753                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2754                         break;          
2755                 case OP_SQRT:
2756                         x86_fsqrt (code);
2757                         break;          
2758                 case OP_X86_FPOP:
2759                         x86_fstp (code, 0);
2760                         break;          
2761                 case OP_FREM: {
2762                         guint8 *l1, *l2;
2763
2764                         x86_push_reg (code, X86_EAX);
2765                         /* we need to exchange ST(0) with ST(1) */
2766                         x86_fxch (code, 1);
2767
2768                         /* this requires a loop, because fprem somtimes 
2769                          * returns a partial remainder */
2770                         l1 = code;
2771                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2772                         /* x86_fprem1 (code); */
2773                         x86_fprem (code);
2774                         x86_fnstsw (code);
2775                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
2776                         l2 = code + 2;
2777                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2778
2779                         /* pop result */
2780                         x86_fstp (code, 1);
2781
2782                         x86_pop_reg (code, X86_EAX);
2783                         break;
2784                 }
2785                 case OP_FCOMPARE:
2786                         if (cfg->opt & MONO_OPT_FCMOV) {
2787                                 x86_fcomip (code, 1);
2788                                 x86_fstp (code, 0);
2789                                 break;
2790                         }
2791                         /* this overwrites EAX */
2792                         EMIT_FPCOMPARE(code);
2793                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2794                         break;
2795                 case OP_FCEQ:
2796                         if (cfg->opt & MONO_OPT_FCMOV) {
2797                                 /* zeroing the register at the start results in 
2798                                  * shorter and faster code (we can also remove the widening op)
2799                                  */
2800                                 guchar *unordered_check;
2801                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2802                                 x86_fcomip (code, 1);
2803                                 x86_fstp (code, 0);
2804                                 unordered_check = code;
2805                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2806                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2807                                 x86_patch (unordered_check, code);
2808                                 break;
2809                         }
2810                         if (ins->dreg != X86_EAX) 
2811                                 x86_push_reg (code, X86_EAX);
2812
2813                         EMIT_FPCOMPARE(code);
2814                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2815                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2816                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2817                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2818
2819                         if (ins->dreg != X86_EAX) 
2820                                 x86_pop_reg (code, X86_EAX);
2821                         break;
2822                 case OP_FCLT:
2823                 case OP_FCLT_UN:
2824                         if (cfg->opt & MONO_OPT_FCMOV) {
2825                                 /* zeroing the register at the start results in 
2826                                  * shorter and faster code (we can also remove the widening op)
2827                                  */
2828                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2829                                 x86_fcomip (code, 1);
2830                                 x86_fstp (code, 0);
2831                                 if (ins->opcode == OP_FCLT_UN) {
2832                                         guchar *unordered_check = code;
2833                                         guchar *jump_to_end;
2834                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2835                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2836                                         jump_to_end = code;
2837                                         x86_jump8 (code, 0);
2838                                         x86_patch (unordered_check, code);
2839                                         x86_inc_reg (code, ins->dreg);
2840                                         x86_patch (jump_to_end, code);
2841                                 } else {
2842                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2843                                 }
2844                                 break;
2845                         }
2846                         if (ins->dreg != X86_EAX) 
2847                                 x86_push_reg (code, X86_EAX);
2848
2849                         EMIT_FPCOMPARE(code);
2850                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2851                         if (ins->opcode == OP_FCLT_UN) {
2852                                 guchar *is_not_zero_check, *end_jump;
2853                                 is_not_zero_check = code;
2854                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2855                                 end_jump = code;
2856                                 x86_jump8 (code, 0);
2857                                 x86_patch (is_not_zero_check, code);
2858                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2859
2860                                 x86_patch (end_jump, code);
2861                         }
2862                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2863                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2864
2865                         if (ins->dreg != X86_EAX) 
2866                                 x86_pop_reg (code, X86_EAX);
2867                         break;
2868                 case OP_FCGT:
2869                 case OP_FCGT_UN:
2870                         if (cfg->opt & MONO_OPT_FCMOV) {
2871                                 /* zeroing the register at the start results in 
2872                                  * shorter and faster code (we can also remove the widening op)
2873                                  */
2874                                 guchar *unordered_check;
2875                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2876                                 x86_fcomip (code, 1);
2877                                 x86_fstp (code, 0);
2878                                 if (ins->opcode == OP_FCGT) {
2879                                         unordered_check = code;
2880                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2881                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2882                                         x86_patch (unordered_check, code);
2883                                 } else {
2884                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2885                                 }
2886                                 break;
2887                         }
2888                         if (ins->dreg != X86_EAX) 
2889                                 x86_push_reg (code, X86_EAX);
2890
2891                         EMIT_FPCOMPARE(code);
2892                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2893                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2894                         if (ins->opcode == OP_FCGT_UN) {
2895                                 guchar *is_not_zero_check, *end_jump;
2896                                 is_not_zero_check = code;
2897                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2898                                 end_jump = code;
2899                                 x86_jump8 (code, 0);
2900                                 x86_patch (is_not_zero_check, code);
2901                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2902         
2903                                 x86_patch (end_jump, code);
2904                         }
2905                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2906                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2907
2908                         if (ins->dreg != X86_EAX) 
2909                                 x86_pop_reg (code, X86_EAX);
2910                         break;
2911                 case OP_FBEQ:
2912                         if (cfg->opt & MONO_OPT_FCMOV) {
2913                                 guchar *jump = code;
2914                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
2915                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2916                                 x86_patch (jump, code);
2917                                 break;
2918                         }
2919                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2920                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2921                         break;
2922                 case OP_FBNE_UN:
2923                         /* Branch if C013 != 100 */
2924                         if (cfg->opt & MONO_OPT_FCMOV) {
2925                                 /* branch if !ZF or (PF|CF) */
2926                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2927                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2928                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
2929                                 break;
2930                         }
2931                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2932                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2933                         break;
2934                 case OP_FBLT:
2935                         if (cfg->opt & MONO_OPT_FCMOV) {
2936                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2937                                 break;
2938                         }
2939                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2940                         break;
2941                 case OP_FBLT_UN:
2942                         if (cfg->opt & MONO_OPT_FCMOV) {
2943                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2944                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2945                                 break;
2946                         }
2947                         if (ins->opcode == OP_FBLT_UN) {
2948                                 guchar *is_not_zero_check, *end_jump;
2949                                 is_not_zero_check = code;
2950                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2951                                 end_jump = code;
2952                                 x86_jump8 (code, 0);
2953                                 x86_patch (is_not_zero_check, code);
2954                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2955
2956                                 x86_patch (end_jump, code);
2957                         }
2958                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2959                         break;
2960                 case OP_FBGT:
2961                 case OP_FBGT_UN:
2962                         if (cfg->opt & MONO_OPT_FCMOV) {
2963                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
2964                                 break;
2965                         }
2966                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2967                         if (ins->opcode == OP_FBGT_UN) {
2968                                 guchar *is_not_zero_check, *end_jump;
2969                                 is_not_zero_check = code;
2970                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2971                                 end_jump = code;
2972                                 x86_jump8 (code, 0);
2973                                 x86_patch (is_not_zero_check, code);
2974                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2975
2976                                 x86_patch (end_jump, code);
2977                         }
2978                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2979                         break;
2980                 case OP_FBGE:
2981                         /* Branch if C013 == 100 or 001 */
2982                         if (cfg->opt & MONO_OPT_FCMOV) {
2983                                 guchar *br1;
2984
2985                                 /* skip branch if C1=1 */
2986                                 br1 = code;
2987                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2988                                 /* branch if (C0 | C3) = 1 */
2989                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
2990                                 x86_patch (br1, code);
2991                                 break;
2992                         }
2993                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2994                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2995                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2996                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2997                         break;
2998                 case OP_FBGE_UN:
2999                         /* Branch if C013 == 000 */
3000                         if (cfg->opt & MONO_OPT_FCMOV) {
3001                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3002                                 break;
3003                         }
3004                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3005                         break;
3006                 case OP_FBLE:
3007                         /* Branch if C013=000 or 100 */
3008                         if (cfg->opt & MONO_OPT_FCMOV) {
3009                                 guchar *br1;
3010
3011                                 /* skip branch if C1=1 */
3012                                 br1 = code;
3013                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3014                                 /* branch if C0=0 */
3015                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3016                                 x86_patch (br1, code);
3017                                 break;
3018                         }
3019                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3020                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3021                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3022                         break;
3023                 case OP_FBLE_UN:
3024                         /* Branch if C013 != 001 */
3025                         if (cfg->opt & MONO_OPT_FCMOV) {
3026                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3027                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3028                                 break;
3029                         }
3030                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3031                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3032                         break;
3033                 case CEE_CKFINITE: {
3034                         x86_push_reg (code, X86_EAX);
3035                         x86_fxam (code);
3036                         x86_fnstsw (code);
3037                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3038                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3039                         x86_pop_reg (code, X86_EAX);
3040                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3041                         break;
3042                 }
3043                 case OP_TLS_GET: {
3044                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3045                         break;
3046                 }
3047                 case OP_MEMORY_BARRIER: {
3048                         /* Not needed on x86 */
3049                         break;
3050                 }
3051                 case OP_ATOMIC_ADD_I4: {
3052                         int dreg = ins->dreg;
3053
3054                         if (dreg == ins->inst_basereg) {
3055                                 x86_push_reg (code, ins->sreg2);
3056                                 dreg = ins->sreg2;
3057                         } 
3058                         
3059                         if (dreg != ins->sreg2)
3060                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3061
3062                         x86_prefix (code, X86_LOCK_PREFIX);
3063                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3064
3065                         if (dreg != ins->dreg) {
3066                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3067                                 x86_pop_reg (code, dreg);
3068                         }
3069
3070                         break;
3071                 }
3072                 case OP_ATOMIC_ADD_NEW_I4: {
3073                         int dreg = ins->dreg;
3074
3075                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3076                         if (ins->sreg2 == dreg) {
3077                                 if (dreg == X86_EBX) {
3078                                         dreg = X86_EDI;
3079                                         if (ins->inst_basereg == X86_EDI)
3080                                                 dreg = X86_ESI;
3081                                 } else {
3082                                         dreg = X86_EBX;
3083                                         if (ins->inst_basereg == X86_EBX)
3084                                                 dreg = X86_EDI;
3085                                 }
3086                         } else if (ins->inst_basereg == dreg) {
3087                                 if (dreg == X86_EBX) {
3088                                         dreg = X86_EDI;
3089                                         if (ins->sreg2 == X86_EDI)
3090                                                 dreg = X86_ESI;
3091                                 } else {
3092                                         dreg = X86_EBX;
3093                                         if (ins->sreg2 == X86_EBX)
3094                                                 dreg = X86_EDI;
3095                                 }
3096                         }
3097
3098                         if (dreg != ins->dreg) {
3099                                 x86_push_reg (code, dreg);
3100                         }
3101
3102                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3103                         x86_prefix (code, X86_LOCK_PREFIX);
3104                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3105                         /* dreg contains the old value, add with sreg2 value */
3106                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3107                         
3108                         if (ins->dreg != dreg) {
3109                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3110                                 x86_pop_reg (code, dreg);
3111                         }
3112
3113                         break;
3114                 }
3115                 case OP_ATOMIC_EXCHANGE_I4: {
3116                         guchar *br[2];
3117                         int sreg2 = ins->sreg2;
3118                         int breg = ins->inst_basereg;
3119
3120                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3121                          * hack to overcome limits in x86 reg allocator 
3122                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3123                          */
3124                         if (ins->dreg != X86_EAX)
3125                                 x86_push_reg (code, X86_EAX);
3126                         
3127                         /* We need the EAX reg for the cmpxchg */
3128                         if (ins->sreg2 == X86_EAX) {
3129                                 x86_push_reg (code, X86_EDX);
3130                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3131                                 sreg2 = X86_EDX;
3132                         }
3133
3134                         if (breg == X86_EAX) {
3135                                 x86_push_reg (code, X86_ESI);
3136                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3137                                 breg = X86_ESI;
3138                         }
3139
3140                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3141
3142                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3143                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3144                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3145                         x86_patch (br [1], br [0]);
3146
3147                         if (breg != ins->inst_basereg)
3148                                 x86_pop_reg (code, X86_ESI);
3149
3150                         if (ins->dreg != X86_EAX) {
3151                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3152                                 x86_pop_reg (code, X86_EAX);
3153                         }
3154
3155                         if (ins->sreg2 != sreg2)
3156                                 x86_pop_reg (code, X86_EDX);
3157
3158                         break;
3159                 }
3160                 default:
3161                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3162                         g_assert_not_reached ();
3163                 }
3164
3165                 if ((code - cfg->native_code - offset) > max_len) {
3166                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3167                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3168                         g_assert_not_reached ();
3169                 }
3170                
3171                 cpos += max_len;
3172
3173                 last_ins = ins;
3174                 last_offset = offset;
3175                 
3176                 ins = ins->next;
3177         }
3178
3179         cfg->code_len = code - cfg->native_code;
3180 }
3181
3182 void
3183 mono_arch_register_lowlevel_calls (void)
3184 {
3185 }
3186
3187 void
3188 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3189 {
3190         MonoJumpInfo *patch_info;
3191         gboolean compile_aot = !run_cctors;
3192
3193         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3194                 unsigned char *ip = patch_info->ip.i + code;
3195                 const unsigned char *target;
3196
3197                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3198
3199                 if (compile_aot) {
3200                         switch (patch_info->type) {
3201                         case MONO_PATCH_INFO_BB:
3202                         case MONO_PATCH_INFO_LABEL:
3203                                 break;
3204                         default:
3205                                 /* No need to patch these */
3206                                 continue;
3207                         }
3208                 }
3209
3210                 switch (patch_info->type) {
3211                 case MONO_PATCH_INFO_IP:
3212                         *((gconstpointer *)(ip)) = target;
3213                         break;
3214                 case MONO_PATCH_INFO_CLASS_INIT: {
3215                         guint8 *code = ip;
3216                         /* Might already been changed to a nop */
3217                         x86_call_code (code, 0);
3218                         x86_patch (ip, target);
3219                         break;
3220                 }
3221                 case MONO_PATCH_INFO_ABS:
3222                 case MONO_PATCH_INFO_METHOD:
3223                 case MONO_PATCH_INFO_METHOD_JUMP:
3224                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3225                 case MONO_PATCH_INFO_BB:
3226                 case MONO_PATCH_INFO_LABEL:
3227                         x86_patch (ip, target);
3228                         break;
3229                 case MONO_PATCH_INFO_NONE:
3230                         break;
3231                 default: {
3232                         guint32 offset = mono_arch_get_patch_offset (ip);
3233                         *((gconstpointer *)(ip + offset)) = target;
3234                         break;
3235                 }
3236                 }
3237         }
3238 }
3239
3240 guint8 *
3241 mono_arch_emit_prolog (MonoCompile *cfg)
3242 {
3243         MonoMethod *method = cfg->method;
3244         MonoBasicBlock *bb;
3245         MonoMethodSignature *sig;
3246         MonoInst *inst;
3247         int alloc_size, pos, max_offset, i;
3248         guint8 *code;
3249
3250         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3251         code = cfg->native_code = g_malloc (cfg->code_size);
3252
3253         x86_push_reg (code, X86_EBP);
3254         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3255
3256         alloc_size = cfg->stack_offset;
3257         pos = 0;
3258
3259         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3260                 /* Might need to attach the thread to the JIT */
3261                 if (lmf_tls_offset != -1) {
3262                         guint8 *buf;
3263
3264                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3265                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3266                         buf = code;
3267                         x86_branch8 (code, X86_CC_NE, 0, 0);
3268                         x86_push_imm (code, cfg->domain);
3269                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3270                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3271                         x86_patch (buf, code);
3272 #ifdef PLATFORM_WIN32
3273                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3274                         /* FIXME: Add a separate key for LMF to avoid this */
3275                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3276 #endif
3277                 } else {
3278                         g_assert (!cfg->compile_aot);
3279                         x86_push_imm (code, cfg->domain);
3280                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3281                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3282                 }
3283         }
3284
3285         if (method->save_lmf) {
3286                 pos += sizeof (MonoLMF);
3287
3288                 /* save the current IP */
3289                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3290                 x86_push_imm_template (code);
3291
3292                 /* save all caller saved regs */
3293                 x86_push_reg (code, X86_EBP);
3294                 x86_push_reg (code, X86_ESI);
3295                 x86_push_reg (code, X86_EDI);
3296                 x86_push_reg (code, X86_EBX);
3297
3298                 /* save method info */
3299                 x86_push_imm (code, method);
3300
3301                 /* get the address of lmf for the current thread */
3302                 /* 
3303                  * This is performance critical so we try to use some tricks to make
3304                  * it fast.
3305                  */
3306                 if (lmf_tls_offset != -1) {
3307                         /* Load lmf quicky using the GS register */
3308                         code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
3309 #ifdef PLATFORM_WIN32
3310                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3311                         /* FIXME: Add a separate key for LMF to avoid this */
3312                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3313 #endif
3314                 } else {
3315                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3316                 }
3317
3318                 /* push lmf */
3319                 x86_push_reg (code, X86_EAX); 
3320                 /* push *lfm (previous_lmf) */
3321                 x86_push_membase (code, X86_EAX, 0);
3322                 /* *(lmf) = ESP */
3323                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3324         } else {
3325
3326                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3327                         x86_push_reg (code, X86_EBX);
3328                         pos += 4;
3329                 }
3330
3331                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3332                         x86_push_reg (code, X86_EDI);
3333                         pos += 4;
3334                 }
3335
3336                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3337                         x86_push_reg (code, X86_ESI);
3338                         pos += 4;
3339                 }
3340         }
3341
3342         alloc_size -= pos;
3343
3344 #if __APPLE__
3345         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3346         {
3347                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3348                 if (tot & 4) {
3349                         tot += 4;
3350                         alloc_size += 4;
3351                 }
3352                 if (tot & 8) {
3353                         alloc_size += 8;
3354                 }
3355         }
3356 #endif
3357
3358         if (alloc_size) {
3359                 /* See mono_emit_stack_alloc */
3360 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3361                 guint32 remaining_size = alloc_size;
3362                 while (remaining_size >= 0x1000) {
3363                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3364                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3365                         remaining_size -= 0x1000;
3366                 }
3367                 if (remaining_size)
3368                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3369 #else
3370                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3371 #endif
3372         }
3373
3374 #if __APPLE_
3375         /* check the stack is aligned */
3376         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3377         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3378         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3379         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3380         x86_breakpoint (code);
3381 #endif
3382
3383         /* compute max_offset in order to use short forward jumps */
3384         max_offset = 0;
3385         if (cfg->opt & MONO_OPT_BRANCH) {
3386                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3387                         MonoInst *ins = bb->code;
3388                         bb->max_offset = max_offset;
3389
3390                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3391                                 max_offset += 6;
3392                         /* max alignment for loops */
3393                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3394                                 max_offset += LOOP_ALIGNMENT;
3395
3396                         while (ins) {
3397                                 if (ins->opcode == OP_LABEL)
3398                                         ins->inst_c1 = max_offset;
3399                                 
3400                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3401                                 ins = ins->next;
3402                         }
3403                 }
3404         }
3405
3406         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3407                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3408
3409         /* load arguments allocated to register from the stack */
3410         sig = mono_method_signature (method);
3411         pos = 0;
3412
3413         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3414                 inst = cfg->varinfo [pos];
3415                 if (inst->opcode == OP_REGVAR) {
3416                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3417                         if (cfg->verbose_level > 2)
3418                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3419                 }
3420                 pos++;
3421         }
3422
3423         cfg->code_len = code - cfg->native_code;
3424
3425         return code;
3426 }
3427
3428 void
3429 mono_arch_emit_epilog (MonoCompile *cfg)
3430 {
3431         MonoMethod *method = cfg->method;
3432         MonoMethodSignature *sig = mono_method_signature (method);
3433         int quad, pos;
3434         guint32 stack_to_pop;
3435         guint8 *code;
3436         int max_epilog_size = 16;
3437         CallInfo *cinfo;
3438         
3439         if (cfg->method->save_lmf)
3440                 max_epilog_size += 128;
3441         
3442         if (mono_jit_trace_calls != NULL)
3443                 max_epilog_size += 50;
3444
3445         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3446                 cfg->code_size *= 2;
3447                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3448                 mono_jit_stats.code_reallocs++;
3449         }
3450
3451         code = cfg->native_code + cfg->code_len;
3452
3453         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3454                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3455
3456         /* the code restoring the registers must be kept in sync with CEE_JMP */
3457         pos = 0;
3458         
3459         if (method->save_lmf) {
3460                 gint32 prev_lmf_reg;
3461                 gint32 lmf_offset = -sizeof (MonoLMF);
3462
3463                 /* Find a spare register */
3464                 switch (sig->ret->type) {
3465                 case MONO_TYPE_I8:
3466                 case MONO_TYPE_U8:
3467                         prev_lmf_reg = X86_EDI;
3468                         cfg->used_int_regs |= (1 << X86_EDI);
3469                         break;
3470                 default:
3471                         prev_lmf_reg = X86_EDX;
3472                         break;
3473                 }
3474
3475                 /* reg = previous_lmf */
3476                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3477
3478                 /* ecx = lmf */
3479                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3480
3481                 /* *(lmf) = previous_lmf */
3482                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3483
3484                 /* restore caller saved regs */
3485                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3486                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3487                 }
3488
3489                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3490                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3491                 }
3492                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3493                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3494                 }
3495
3496                 /* EBP is restored by LEAVE */
3497         } else {
3498                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3499                         pos -= 4;
3500                 }
3501                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3502                         pos -= 4;
3503                 }
3504                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3505                         pos -= 4;
3506                 }
3507
3508                 if (pos)
3509                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3510
3511                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3512                         x86_pop_reg (code, X86_ESI);
3513                 }
3514                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3515                         x86_pop_reg (code, X86_EDI);
3516                 }
3517                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3518                         x86_pop_reg (code, X86_EBX);
3519                 }
3520         }
3521
3522         /* Load returned vtypes into registers if needed */
3523         cinfo = get_call_info (sig, FALSE);
3524         if (cinfo->ret.storage == ArgValuetypeInReg) {
3525                 for (quad = 0; quad < 2; quad ++) {
3526                         switch (cinfo->ret.pair_storage [quad]) {
3527                         case ArgInIReg:
3528                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3529                                 break;
3530                         case ArgOnFloatFpStack:
3531                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3532                                 break;
3533                         case ArgOnDoubleFpStack:
3534                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3535                                 break;
3536                         case ArgNone:
3537                                 break;
3538                         default:
3539                                 g_assert_not_reached ();
3540                         }
3541                 }
3542         }
3543
3544         x86_leave (code);
3545
3546         if (CALLCONV_IS_STDCALL (sig)) {
3547                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3548
3549                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3550         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3551                 stack_to_pop = 4;
3552         else
3553                 stack_to_pop = 0;
3554
3555         if (stack_to_pop)
3556                 x86_ret_imm (code, stack_to_pop);
3557         else
3558                 x86_ret (code);
3559
3560         g_free (cinfo);
3561
3562         cfg->code_len = code - cfg->native_code;
3563
3564         g_assert (cfg->code_len < cfg->code_size);
3565 }
3566
3567 void
3568 mono_arch_emit_exceptions (MonoCompile *cfg)
3569 {
3570         MonoJumpInfo *patch_info;
3571         int nthrows, i;
3572         guint8 *code;
3573         MonoClass *exc_classes [16];
3574         guint8 *exc_throw_start [16], *exc_throw_end [16];
3575         guint32 code_size;
3576         int exc_count = 0;
3577
3578         /* Compute needed space */
3579         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3580                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3581                         exc_count++;
3582         }
3583
3584         /* 
3585          * make sure we have enough space for exceptions
3586          * 16 is the size of two push_imm instructions and a call
3587          */
3588         if (cfg->compile_aot)
3589                 code_size = exc_count * 32;
3590         else
3591                 code_size = exc_count * 16;
3592
3593         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3594                 cfg->code_size *= 2;
3595                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3596                 mono_jit_stats.code_reallocs++;
3597         }
3598
3599         code = cfg->native_code + cfg->code_len;
3600
3601         nthrows = 0;
3602         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3603                 switch (patch_info->type) {
3604                 case MONO_PATCH_INFO_EXC: {
3605                         MonoClass *exc_class;
3606                         guint8 *buf, *buf2;
3607                         guint32 throw_ip;
3608
3609                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3610
3611                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3612                         g_assert (exc_class);
3613                         throw_ip = patch_info->ip.i;
3614
3615                         /* Find a throw sequence for the same exception class */
3616                         for (i = 0; i < nthrows; ++i)
3617                                 if (exc_classes [i] == exc_class)
3618                                         break;
3619                         if (i < nthrows) {
3620                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3621                                 x86_jump_code (code, exc_throw_start [i]);
3622                                 patch_info->type = MONO_PATCH_INFO_NONE;
3623                         }
3624                         else {
3625                                 guint32 size;
3626
3627                                 /* Compute size of code following the push <OFFSET> */
3628                                 size = 5 + 5;
3629
3630                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3631                                         /* Use the shorter form */
3632                                         buf = buf2 = code;
3633                                         x86_push_imm (code, 0);
3634                                 }
3635                                 else {
3636                                         buf = code;
3637                                         x86_push_imm (code, 0xf0f0f0f0);
3638                                         buf2 = code;
3639                                 }
3640
3641                                 if (nthrows < 16) {
3642                                         exc_classes [nthrows] = exc_class;
3643                                         exc_throw_start [nthrows] = code;
3644                                 }
3645
3646                                 x86_push_imm (code, exc_class->type_token);
3647                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3648                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3649                                 patch_info->ip.i = code - cfg->native_code;
3650                                 x86_call_code (code, 0);
3651                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3652                                 while (buf < buf2)
3653                                         x86_nop (buf);
3654
3655                                 if (nthrows < 16) {
3656                                         exc_throw_end [nthrows] = code;
3657                                         nthrows ++;
3658                                 }
3659                         }
3660                         break;
3661                 }
3662                 default:
3663                         /* do nothing */
3664                         break;
3665                 }
3666         }
3667
3668         cfg->code_len = code - cfg->native_code;
3669
3670         g_assert (cfg->code_len < cfg->code_size);
3671 }
3672
3673 void
3674 mono_arch_flush_icache (guint8 *code, gint size)
3675 {
3676         /* not needed */
3677 }
3678
3679 void
3680 mono_arch_flush_register_windows (void)
3681 {
3682 }
3683
3684 /*
3685  * Support for fast access to the thread-local lmf structure using the GS
3686  * segment register on NPTL + kernel 2.6.x.
3687  */
3688
3689 static gboolean tls_offset_inited = FALSE;
3690
3691 void
3692 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3693 {
3694         if (!tls_offset_inited) {
3695                 if (!getenv ("MONO_NO_TLS")) {
3696 #ifdef PLATFORM_WIN32
3697                         /* 
3698                          * We need to init this multiple times, since when we are first called, the key might not
3699                          * be initialized yet.
3700                          */
3701                         appdomain_tls_offset = mono_domain_get_tls_key ();
3702                         lmf_tls_offset = mono_get_jit_tls_key ();
3703                         thread_tls_offset = mono_thread_get_tls_key ();
3704
3705                         /* Only 64 tls entries can be accessed using inline code */
3706                         if (appdomain_tls_offset >= 64)
3707                                 appdomain_tls_offset = -1;
3708                         if (lmf_tls_offset >= 64)
3709                                 lmf_tls_offset = -1;
3710                         if (thread_tls_offset >= 64)
3711                                 thread_tls_offset = -1;
3712 #else
3713 #if MONO_XEN_OPT
3714                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
3715 #endif
3716                         tls_offset_inited = TRUE;
3717                         appdomain_tls_offset = mono_domain_get_tls_offset ();
3718                         lmf_tls_offset = mono_get_lmf_tls_offset ();
3719                         thread_tls_offset = mono_thread_get_tls_offset ();
3720 #endif
3721                 }
3722         }               
3723 }
3724
3725 void
3726 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
3727 {
3728 }
3729
3730 void
3731 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3732 {
3733         MonoCallInst *call = (MonoCallInst*)inst;
3734         CallInfo *cinfo = get_call_info (inst->signature, FALSE);
3735
3736         /* add the this argument */
3737         if (this_reg != -1) {
3738                 if (cinfo->args [0].storage == ArgInIReg) {
3739                         MonoInst *this;
3740                         MONO_INST_NEW (cfg, this, OP_MOVE);
3741                         this->type = this_type;
3742                         this->sreg1 = this_reg;
3743                         this->dreg = mono_regstate_next_int (cfg->rs);
3744                         mono_bblock_add_inst (cfg->cbb, this);
3745
3746                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
3747                 }
3748                 else {
3749                         MonoInst *this;
3750                         MONO_INST_NEW (cfg, this, OP_OUTARG);
3751                         this->type = this_type;
3752                         this->sreg1 = this_reg;
3753                         mono_bblock_add_inst (cfg->cbb, this);
3754                 }
3755         }
3756
3757         if (vt_reg != -1) {
3758                 MonoInst *vtarg;
3759
3760                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3761                         /*
3762                          * The valuetype is in EAX:EDX after the call, needs to be copied to
3763                          * the stack. Save the address here, so the call instruction can
3764                          * access it.
3765                          */
3766                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
3767                         vtarg->inst_destbasereg = X86_ESP;
3768                         vtarg->inst_offset = inst->stack_usage;
3769                         vtarg->sreg1 = vt_reg;
3770                         mono_bblock_add_inst (cfg->cbb, vtarg);
3771                 }
3772                 else if (cinfo->ret.storage == ArgInIReg) {
3773                         /* The return address is passed in a register */
3774                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
3775                         vtarg->sreg1 = vt_reg;
3776                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
3777                         mono_bblock_add_inst (cfg->cbb, vtarg);
3778
3779                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
3780                 } else {
3781                         MonoInst *vtarg;
3782                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3783                         vtarg->type = STACK_MP;
3784                         vtarg->sreg1 = vt_reg;
3785                         mono_bblock_add_inst (cfg->cbb, vtarg);
3786                 }
3787         }
3788
3789         g_free (cinfo);
3790 }
3791
3792 MonoInst*
3793 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3794 {
3795         MonoInst *ins = NULL;
3796
3797         if (cmethod->klass == mono_defaults.math_class) {
3798                 if (strcmp (cmethod->name, "Sin") == 0) {
3799                         MONO_INST_NEW (cfg, ins, OP_SIN);
3800                         ins->inst_i0 = args [0];
3801                 } else if (strcmp (cmethod->name, "Cos") == 0) {
3802                         MONO_INST_NEW (cfg, ins, OP_COS);
3803                         ins->inst_i0 = args [0];
3804                 } else if (strcmp (cmethod->name, "Tan") == 0) {
3805                         MONO_INST_NEW (cfg, ins, OP_TAN);
3806                         ins->inst_i0 = args [0];
3807                 } else if (strcmp (cmethod->name, "Atan") == 0) {
3808                         MONO_INST_NEW (cfg, ins, OP_ATAN);
3809                         ins->inst_i0 = args [0];
3810                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
3811                         MONO_INST_NEW (cfg, ins, OP_SQRT);
3812                         ins->inst_i0 = args [0];
3813                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
3814                         MONO_INST_NEW (cfg, ins, OP_ABS);
3815                         ins->inst_i0 = args [0];
3816                 }
3817 #if 0
3818                 /* OP_FREM is not IEEE compatible */
3819                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
3820                         MONO_INST_NEW (cfg, ins, OP_FREM);
3821                         ins->inst_i0 = args [0];
3822                         ins->inst_i1 = args [1];
3823                 }
3824 #endif
3825         } else if (cmethod->klass == mono_defaults.thread_class &&
3826                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
3827                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
3828         } else if(cmethod->klass->image == mono_defaults.corlib &&
3829                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
3830                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
3831
3832                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3833                         MonoInst *ins_iconst;
3834
3835                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3836                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3837                         ins_iconst->inst_c0 = 1;
3838
3839                         ins->inst_i0 = args [0];
3840                         ins->inst_i1 = ins_iconst;
3841                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3842                         MonoInst *ins_iconst;
3843
3844                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3845                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3846                         ins_iconst->inst_c0 = -1;
3847
3848                         ins->inst_i0 = args [0];
3849                         ins->inst_i1 = ins_iconst;
3850                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3851                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
3852
3853                         ins->inst_i0 = args [0];
3854                         ins->inst_i1 = args [1];
3855                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3856                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3857
3858                         ins->inst_i0 = args [0];
3859                         ins->inst_i1 = args [1];
3860                 }
3861         }
3862
3863         return ins;
3864 }
3865
3866
3867 gboolean
3868 mono_arch_print_tree (MonoInst *tree, int arity)
3869 {
3870         return 0;
3871 }
3872
3873 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
3874 {
3875         MonoInst* ins;
3876         
3877         if (appdomain_tls_offset == -1)
3878                 return NULL;
3879
3880         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3881         ins->inst_offset = appdomain_tls_offset;
3882         return ins;
3883 }
3884
3885 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
3886 {
3887         MonoInst* ins;
3888
3889         if (thread_tls_offset == -1)
3890                 return NULL;
3891
3892         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3893         ins->inst_offset = thread_tls_offset;
3894         return ins;
3895 }
3896
3897 guint32
3898 mono_arch_get_patch_offset (guint8 *code)
3899 {
3900         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
3901                 return 2;
3902         else if ((code [0] == 0xba))
3903                 return 1;
3904         else if ((code [0] == 0x68))
3905                 /* push IMM */
3906                 return 1;
3907         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
3908                 /* push <OFFSET>(<REG>) */
3909                 return 2;
3910         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
3911                 /* call *<OFFSET>(<REG>) */
3912                 return 2;
3913         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
3914                 /* fldl <ADDR> */
3915                 return 2;
3916         else if ((code [0] == 0x58) && (code [1] == 0x05))
3917                 /* pop %eax; add <OFFSET>, %eax */
3918                 return 2;
3919         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
3920                 /* pop <REG>; add <OFFSET>, <REG> */
3921                 return 3;
3922         else {
3923                 g_assert_not_reached ();
3924                 return -1;
3925         }
3926 }
3927
3928 gpointer*
3929 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
3930 {
3931         guint8 reg = 0;
3932         gint32 disp = 0;
3933
3934         /* go to the start of the call instruction
3935          *
3936          * address_byte = (m << 6) | (o << 3) | reg
3937          * call opcode: 0xff address_byte displacement
3938          * 0xff m=1,o=2 imm8
3939          * 0xff m=2,o=2 imm32
3940          */
3941         code -= 6;
3942
3943         /* 
3944          * A given byte sequence can match more than case here, so we have to be
3945          * really careful about the ordering of the cases. Longer sequences
3946          * come first.
3947          */
3948         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
3949                 /*
3950                  * This is an interface call
3951                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
3952                  * ff 10                   call   *(%eax)
3953                  */
3954                 reg = x86_modrm_rm (code [5]);
3955                 disp = 0;
3956         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
3957                 reg = code [4] & 0x07;
3958                 disp = (signed char)code [5];
3959         } else {
3960                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
3961                         reg = code [1] & 0x07;
3962                         disp = *((gint32*)(code + 2));
3963                 } else if ((code [1] == 0xe8)) {
3964                         return NULL;
3965                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
3966                         /*
3967                          * This is a interface call
3968                          * 8b 40 30   mov    0x30(%eax),%eax
3969                          * ff 10      call   *(%eax)
3970                          */
3971                         disp = 0;
3972                         reg = code [5] & 0x07;
3973                 }
3974                 else
3975                         return NULL;
3976         }
3977
3978         return (gpointer*)(((gint32)(regs [reg])) + disp);
3979 }
3980
3981 gpointer* 
3982 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
3983 {
3984         guint8 reg = 0;
3985         gint32 disp = 0;
3986
3987         code -= 7;
3988         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
3989                 reg = x86_modrm_rm (code [1]);
3990                 disp = code [4];
3991
3992                 if (reg == X86_EAX)
3993                         return NULL;
3994                 else
3995                         return (gpointer*)(((gint32)(regs [reg])) + disp);
3996         }
3997
3998         return NULL;
3999 }