2007-01-31 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #include <unistd.h>
15
16 #include <mono/metadata/appdomain.h>
17 #include <mono/metadata/debug-helpers.h>
18 #include <mono/metadata/threads.h>
19 #include <mono/metadata/profiler-private.h>
20 #include <mono/utils/mono-math.h>
21
22 #include "trace.h"
23 #include "mini-x86.h"
24 #include "inssel.h"
25 #include "cpu-x86.h"
26
27 /* On windows, these hold the key returned by TlsAlloc () */
28 static gint lmf_tls_offset = -1;
29 static gint lmf_addr_tls_offset = -1;
30 static gint appdomain_tls_offset = -1;
31 static gint thread_tls_offset = -1;
32
33 #ifdef MONO_XEN_OPT
34 static gboolean optimize_for_xen = TRUE;
35 #else
36 #define optimize_for_xen 0
37 #endif
38
39 #ifdef PLATFORM_WIN32
40 static gboolean is_win32 = TRUE;
41 #else
42 static gboolean is_win32 = FALSE;
43 #endif
44
45 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
46
47 #define ARGS_OFFSET 8
48
49 #ifdef PLATFORM_WIN32
50 /* Under windows, the default pinvoke calling convention is stdcall */
51 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
52 #else
53 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
54 #endif
55
56 #define NOT_IMPLEMENTED g_assert_not_reached ()
57
58 const char*
59 mono_arch_regname (int reg) {
60         switch (reg) {
61         case X86_EAX: return "%eax";
62         case X86_EBX: return "%ebx";
63         case X86_ECX: return "%ecx";
64         case X86_EDX: return "%edx";
65         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
66         case X86_EDI: return "%edi";
67         case X86_ESI: return "%esi";
68         }
69         return "unknown";
70 }
71
72 const char*
73 mono_arch_fregname (int reg) {
74         return "unknown";
75 }
76
77 typedef enum {
78         ArgInIReg,
79         ArgInFloatSSEReg,
80         ArgInDoubleSSEReg,
81         ArgOnStack,
82         ArgValuetypeInReg,
83         ArgOnFloatFpStack,
84         ArgOnDoubleFpStack,
85         ArgNone
86 } ArgStorage;
87
88 typedef struct {
89         gint16 offset;
90         gint8  reg;
91         ArgStorage storage;
92
93         /* Only if storage == ArgValuetypeInReg */
94         ArgStorage pair_storage [2];
95         gint8 pair_regs [2];
96 } ArgInfo;
97
98 typedef struct {
99         int nargs;
100         guint32 stack_usage;
101         guint32 reg_usage;
102         guint32 freg_usage;
103         gboolean need_stack_align;
104         guint32 stack_align_amount;
105         ArgInfo ret;
106         ArgInfo sig_cookie;
107         ArgInfo args [1];
108 } CallInfo;
109
110 #define PARAM_REGS 0
111
112 #define FLOAT_PARAM_REGS 0
113
114 static X86_Reg_No param_regs [] = { 0 };
115
116 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
117 #define SMALL_STRUCTS_IN_REGS
118 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
119 #endif
120
121 static void inline
122 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
123 {
124     ainfo->offset = *stack_size;
125
126     if (*gr >= PARAM_REGS) {
127                 ainfo->storage = ArgOnStack;
128                 (*stack_size) += sizeof (gpointer);
129     }
130     else {
131                 ainfo->storage = ArgInIReg;
132                 ainfo->reg = param_regs [*gr];
133                 (*gr) ++;
134     }
135 }
136
137 static void inline
138 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
139 {
140         ainfo->offset = *stack_size;
141
142         g_assert (PARAM_REGS == 0);
143         
144         ainfo->storage = ArgOnStack;
145         (*stack_size) += sizeof (gpointer) * 2;
146 }
147
148 static void inline
149 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
150 {
151     ainfo->offset = *stack_size;
152
153     if (*gr >= FLOAT_PARAM_REGS) {
154                 ainfo->storage = ArgOnStack;
155                 (*stack_size) += is_double ? 8 : 4;
156     }
157     else {
158                 /* A double register */
159                 if (is_double)
160                         ainfo->storage = ArgInDoubleSSEReg;
161                 else
162                         ainfo->storage = ArgInFloatSSEReg;
163                 ainfo->reg = *gr;
164                 (*gr) += 1;
165     }
166 }
167
168
169 static void
170 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
171                gboolean is_return,
172                guint32 *gr, guint32 *fr, guint32 *stack_size)
173 {
174         guint32 size;
175         MonoClass *klass;
176
177         klass = mono_class_from_mono_type (type);
178         if (sig->pinvoke) 
179                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
180         else 
181                 size = mono_type_stack_size (&klass->byval_arg, NULL);
182
183 #ifdef SMALL_STRUCTS_IN_REGS
184         if (sig->pinvoke && is_return) {
185                 MonoMarshalType *info;
186
187                 /*
188                  * the exact rules are not very well documented, the code below seems to work with the 
189                  * code generated by gcc 3.3.3 -mno-cygwin.
190                  */
191                 info = mono_marshal_load_type_info (klass);
192                 g_assert (info);
193
194                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
195
196                 /* Special case structs with only a float member */
197                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
198                         ainfo->storage = ArgValuetypeInReg;
199                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
200                         return;
201                 }
202                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
203                         ainfo->storage = ArgValuetypeInReg;
204                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
205                         return;
206                 }               
207                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
208                         ainfo->storage = ArgValuetypeInReg;
209                         ainfo->pair_storage [0] = ArgInIReg;
210                         ainfo->pair_regs [0] = return_regs [0];
211                         if (info->native_size > 4) {
212                                 ainfo->pair_storage [1] = ArgInIReg;
213                                 ainfo->pair_regs [1] = return_regs [1];
214                         }
215                         return;
216                 }
217         }
218 #endif
219
220         ainfo->offset = *stack_size;
221         ainfo->storage = ArgOnStack;
222         *stack_size += ALIGN_TO (size, sizeof (gpointer));
223 }
224
225 /*
226  * get_call_info:
227  *
228  *  Obtain information about a call according to the calling convention.
229  * For x86 ELF, see the "System V Application Binary Interface Intel386 
230  * Architecture Processor Supplment, Fourth Edition" document for more
231  * information.
232  * For x86 win32, see ???.
233  */
234 static CallInfo*
235 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
236 {
237         guint32 i, gr, fr;
238         MonoType *ret_type;
239         int n = sig->hasthis + sig->param_count;
240         guint32 stack_size = 0;
241         CallInfo *cinfo;
242
243         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
244
245         gr = 0;
246         fr = 0;
247
248         /* return value */
249         {
250                 ret_type = mono_type_get_underlying_type (sig->ret);
251                 switch (ret_type->type) {
252                 case MONO_TYPE_BOOLEAN:
253                 case MONO_TYPE_I1:
254                 case MONO_TYPE_U1:
255                 case MONO_TYPE_I2:
256                 case MONO_TYPE_U2:
257                 case MONO_TYPE_CHAR:
258                 case MONO_TYPE_I4:
259                 case MONO_TYPE_U4:
260                 case MONO_TYPE_I:
261                 case MONO_TYPE_U:
262                 case MONO_TYPE_PTR:
263                 case MONO_TYPE_FNPTR:
264                 case MONO_TYPE_CLASS:
265                 case MONO_TYPE_OBJECT:
266                 case MONO_TYPE_SZARRAY:
267                 case MONO_TYPE_ARRAY:
268                 case MONO_TYPE_STRING:
269                         cinfo->ret.storage = ArgInIReg;
270                         cinfo->ret.reg = X86_EAX;
271                         break;
272                 case MONO_TYPE_U8:
273                 case MONO_TYPE_I8:
274                         cinfo->ret.storage = ArgInIReg;
275                         cinfo->ret.reg = X86_EAX;
276                         break;
277                 case MONO_TYPE_R4:
278                         cinfo->ret.storage = ArgOnFloatFpStack;
279                         break;
280                 case MONO_TYPE_R8:
281                         cinfo->ret.storage = ArgOnDoubleFpStack;
282                         break;
283                 case MONO_TYPE_GENERICINST:
284                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
285                                 cinfo->ret.storage = ArgInIReg;
286                                 cinfo->ret.reg = X86_EAX;
287                                 break;
288                         }
289                         /* Fall through */
290                 case MONO_TYPE_VALUETYPE: {
291                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
292
293                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
294                         if (cinfo->ret.storage == ArgOnStack)
295                                 /* The caller passes the address where the value is stored */
296                                 add_general (&gr, &stack_size, &cinfo->ret);
297                         break;
298                 }
299                 case MONO_TYPE_TYPEDBYREF:
300                         /* Same as a valuetype with size 24 */
301                         add_general (&gr, &stack_size, &cinfo->ret);
302                         ;
303                         break;
304                 case MONO_TYPE_VOID:
305                         cinfo->ret.storage = ArgNone;
306                         break;
307                 default:
308                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
309                 }
310         }
311
312         /* this */
313         if (sig->hasthis)
314                 add_general (&gr, &stack_size, cinfo->args + 0);
315
316         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
317                 gr = PARAM_REGS;
318                 fr = FLOAT_PARAM_REGS;
319                 
320                 /* Emit the signature cookie just before the implicit arguments */
321                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
322         }
323
324         for (i = 0; i < sig->param_count; ++i) {
325                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
326                 MonoType *ptype;
327
328                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
329                         /* We allways pass the sig cookie on the stack for simplicity */
330                         /* 
331                          * Prevent implicit arguments + the sig cookie from being passed 
332                          * in registers.
333                          */
334                         gr = PARAM_REGS;
335                         fr = FLOAT_PARAM_REGS;
336
337                         /* Emit the signature cookie just before the implicit arguments */
338                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
339                 }
340
341                 if (sig->params [i]->byref) {
342                         add_general (&gr, &stack_size, ainfo);
343                         continue;
344                 }
345                 ptype = mono_type_get_underlying_type (sig->params [i]);
346                 switch (ptype->type) {
347                 case MONO_TYPE_BOOLEAN:
348                 case MONO_TYPE_I1:
349                 case MONO_TYPE_U1:
350                         add_general (&gr, &stack_size, ainfo);
351                         break;
352                 case MONO_TYPE_I2:
353                 case MONO_TYPE_U2:
354                 case MONO_TYPE_CHAR:
355                         add_general (&gr, &stack_size, ainfo);
356                         break;
357                 case MONO_TYPE_I4:
358                 case MONO_TYPE_U4:
359                         add_general (&gr, &stack_size, ainfo);
360                         break;
361                 case MONO_TYPE_I:
362                 case MONO_TYPE_U:
363                 case MONO_TYPE_PTR:
364                 case MONO_TYPE_FNPTR:
365                 case MONO_TYPE_CLASS:
366                 case MONO_TYPE_OBJECT:
367                 case MONO_TYPE_STRING:
368                 case MONO_TYPE_SZARRAY:
369                 case MONO_TYPE_ARRAY:
370                         add_general (&gr, &stack_size, ainfo);
371                         break;
372                 case MONO_TYPE_GENERICINST:
373                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
374                                 add_general (&gr, &stack_size, ainfo);
375                                 break;
376                         }
377                         /* Fall through */
378                 case MONO_TYPE_VALUETYPE:
379                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
380                         break;
381                 case MONO_TYPE_TYPEDBYREF:
382                         stack_size += sizeof (MonoTypedRef);
383                         ainfo->storage = ArgOnStack;
384                         break;
385                 case MONO_TYPE_U8:
386                 case MONO_TYPE_I8:
387                         add_general_pair (&gr, &stack_size, ainfo);
388                         break;
389                 case MONO_TYPE_R4:
390                         add_float (&fr, &stack_size, ainfo, FALSE);
391                         break;
392                 case MONO_TYPE_R8:
393                         add_float (&fr, &stack_size, ainfo, TRUE);
394                         break;
395                 default:
396                         g_error ("unexpected type 0x%x", ptype->type);
397                         g_assert_not_reached ();
398                 }
399         }
400
401         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
402                 gr = PARAM_REGS;
403                 fr = FLOAT_PARAM_REGS;
404                 
405                 /* Emit the signature cookie just before the implicit arguments */
406                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
407         }
408
409 #if defined(__APPLE__)
410         if ((stack_size % 16) != 0) { 
411                 cinfo->need_stack_align = TRUE;
412                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
413         }
414 #endif
415
416         cinfo->stack_usage = stack_size;
417         cinfo->reg_usage = gr;
418         cinfo->freg_usage = fr;
419         return cinfo;
420 }
421
422 /*
423  * mono_arch_get_argument_info:
424  * @csig:  a method signature
425  * @param_count: the number of parameters to consider
426  * @arg_info: an array to store the result infos
427  *
428  * Gathers information on parameters such as size, alignment and
429  * padding. arg_info should be large enought to hold param_count + 1 entries. 
430  *
431  * Returns the size of the activation frame.
432  */
433 int
434 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
435 {
436         int k, frame_size = 0;
437         int size, pad;
438         guint32 align;
439         int offset = 8;
440         CallInfo *cinfo;
441
442         cinfo = get_call_info (csig, FALSE);
443
444         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
445                 frame_size += sizeof (gpointer);
446                 offset += 4;
447         }
448
449         arg_info [0].offset = offset;
450
451         if (csig->hasthis) {
452                 frame_size += sizeof (gpointer);
453                 offset += 4;
454         }
455
456         arg_info [0].size = frame_size;
457
458         for (k = 0; k < param_count; k++) {
459                 
460                 if (csig->pinvoke)
461                         size = mono_type_native_stack_size (csig->params [k], &align);
462                 else {
463                         int ialign;
464                         size = mono_type_stack_size (csig->params [k], &ialign);
465                         align = ialign;
466                 }
467
468                 /* ignore alignment for now */
469                 align = 1;
470
471                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
472                 arg_info [k].pad = pad;
473                 frame_size += size;
474                 arg_info [k + 1].pad = 0;
475                 arg_info [k + 1].size = size;
476                 offset += pad;
477                 arg_info [k + 1].offset = offset;
478                 offset += size;
479         }
480
481         align = MONO_ARCH_FRAME_ALIGNMENT;
482         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
483         arg_info [k].pad = pad;
484
485         g_free (cinfo);
486
487         return frame_size;
488 }
489
490 static const guchar cpuid_impl [] = {
491         0x55,                           /* push   %ebp */
492         0x89, 0xe5,                     /* mov    %esp,%ebp */
493         0x53,                           /* push   %ebx */
494         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
495         0x0f, 0xa2,                     /* cpuid   */
496         0x50,                           /* push   %eax */
497         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
498         0x89, 0x18,                     /* mov    %ebx,(%eax) */
499         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
500         0x89, 0x08,                     /* mov    %ecx,(%eax) */
501         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
502         0x89, 0x10,                     /* mov    %edx,(%eax) */
503         0x58,                           /* pop    %eax */
504         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
505         0x89, 0x02,                     /* mov    %eax,(%edx) */
506         0x5b,                           /* pop    %ebx */
507         0xc9,                           /* leave   */
508         0xc3,                           /* ret     */
509 };
510
511 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
512
513 static int 
514 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
515 {
516         int have_cpuid = 0;
517 #ifndef _MSC_VER
518         __asm__  __volatile__ (
519                 "pushfl\n"
520                 "popl %%eax\n"
521                 "movl %%eax, %%edx\n"
522                 "xorl $0x200000, %%eax\n"
523                 "pushl %%eax\n"
524                 "popfl\n"
525                 "pushfl\n"
526                 "popl %%eax\n"
527                 "xorl %%edx, %%eax\n"
528                 "andl $0x200000, %%eax\n"
529                 "movl %%eax, %0"
530                 : "=r" (have_cpuid)
531                 :
532                 : "%eax", "%edx"
533         );
534 #else
535         __asm {
536                 pushfd
537                 pop eax
538                 mov edx, eax
539                 xor eax, 0x200000
540                 push eax
541                 popfd
542                 pushfd
543                 pop eax
544                 xor eax, edx
545                 and eax, 0x200000
546                 mov have_cpuid, eax
547         }
548 #endif
549         if (have_cpuid) {
550                 /* Have to use the code manager to get around WinXP DEP */
551                 static CpuidFunc func = NULL;
552                 void *ptr;
553                 if (!func) {
554                         ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
555                         memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
556                         func = (CpuidFunc)ptr;
557                 }
558                 func (id, p_eax, p_ebx, p_ecx, p_edx);
559
560                 /*
561                  * We use this approach because of issues with gcc and pic code, see:
562                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
563                 __asm__ __volatile__ ("cpuid"
564                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
565                         : "a" (id));
566                 */
567                 return 1;
568         }
569         return 0;
570 }
571
572 /*
573  * Initialize the cpu to execute managed code.
574  */
575 void
576 mono_arch_cpu_init (void)
577 {
578         /* spec compliance requires running with double precision */
579 #ifndef _MSC_VER
580         guint16 fpcw;
581
582         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
583         fpcw &= ~X86_FPCW_PRECC_MASK;
584         fpcw |= X86_FPCW_PREC_DOUBLE;
585         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
586         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
587 #else
588         _control87 (_PC_53, MCW_PC);
589 #endif
590 }
591
592 /*
593  * This function returns the optimizations supported on this cpu.
594  */
595 guint32
596 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
597 {
598         int eax, ebx, ecx, edx;
599         guint32 opts = 0;
600         
601         *exclude_mask = 0;
602         /* Feature Flags function, flags returned in EDX. */
603         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
604                 if (edx & (1 << 15)) {
605                         opts |= MONO_OPT_CMOV;
606                         if (edx & 1)
607                                 opts |= MONO_OPT_FCMOV;
608                         else
609                                 *exclude_mask |= MONO_OPT_FCMOV;
610                 } else
611                         *exclude_mask |= MONO_OPT_CMOV;
612         }
613         return opts;
614 }
615
616 /*
617  * Determine whenever the trap whose info is in SIGINFO is caused by
618  * integer overflow.
619  */
620 gboolean
621 mono_arch_is_int_overflow (void *sigctx, void *info)
622 {
623         MonoContext ctx;
624         guint8* ip;
625
626         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
627
628         ip = (guint8*)ctx.eip;
629
630         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
631                 gint32 reg;
632
633                 /* idiv REG */
634                 switch (x86_modrm_rm (ip [1])) {
635                 case X86_EAX:
636                         reg = ctx.eax;
637                         break;
638                 case X86_ECX:
639                         reg = ctx.ecx;
640                         break;
641                 case X86_EDX:
642                         reg = ctx.edx;
643                         break;
644                 case X86_EBX:
645                         reg = ctx.ebx;
646                         break;
647                 case X86_ESI:
648                         reg = ctx.esi;
649                         break;
650                 case X86_EDI:
651                         reg = ctx.edi;
652                         break;
653                 default:
654                         g_assert_not_reached ();
655                         reg = -1;
656                 }
657
658                 if (reg == -1)
659                         return TRUE;
660         }
661                         
662         return FALSE;
663 }
664
665 GList *
666 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
667 {
668         GList *vars = NULL;
669         int i;
670
671         for (i = 0; i < cfg->num_varinfo; i++) {
672                 MonoInst *ins = cfg->varinfo [i];
673                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
674
675                 /* unused vars */
676                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
677                         continue;
678
679                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
680                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
681                         continue;
682
683                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
684                  * 8bit quantities in caller saved registers on x86 */
685                 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
686                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
687                         g_assert (i == vmv->idx);
688                         vars = g_list_prepend (vars, vmv);
689                 }
690         }
691
692         vars = mono_varlist_sort (cfg, vars, 0);
693
694         return vars;
695 }
696
697 GList *
698 mono_arch_get_global_int_regs (MonoCompile *cfg)
699 {
700         GList *regs = NULL;
701
702         /* we can use 3 registers for global allocation */
703         regs = g_list_prepend (regs, (gpointer)X86_EBX);
704         regs = g_list_prepend (regs, (gpointer)X86_ESI);
705         regs = g_list_prepend (regs, (gpointer)X86_EDI);
706
707         return regs;
708 }
709
710 /*
711  * mono_arch_regalloc_cost:
712  *
713  *  Return the cost, in number of memory references, of the action of 
714  * allocating the variable VMV into a register during global register
715  * allocation.
716  */
717 guint32
718 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
719 {
720         MonoInst *ins = cfg->varinfo [vmv->idx];
721
722         if (cfg->method->save_lmf)
723                 /* The register is already saved */
724                 return (ins->opcode == OP_ARG) ? 1 : 0;
725         else
726                 /* push+pop+possible load if it is an argument */
727                 return (ins->opcode == OP_ARG) ? 3 : 2;
728 }
729  
730 /*
731  * Set var information according to the calling convention. X86 version.
732  * The locals var stuff should most likely be split in another method.
733  */
734 void
735 mono_arch_allocate_vars (MonoCompile *cfg)
736 {
737         MonoMethodSignature *sig;
738         MonoMethodHeader *header;
739         MonoInst *inst;
740         guint32 locals_stack_size, locals_stack_align;
741         int i, offset;
742         gint32 *offsets;
743         CallInfo *cinfo;
744
745         header = mono_method_get_header (cfg->method);
746         sig = mono_method_signature (cfg->method);
747
748         cinfo = get_call_info (sig, FALSE);
749
750         cfg->frame_reg = MONO_ARCH_BASEREG;
751         offset = 0;
752
753         /* Reserve space to save LMF and caller saved registers */
754
755         if (cfg->method->save_lmf) {
756                 offset += sizeof (MonoLMF);
757         } else {
758                 if (cfg->used_int_regs & (1 << X86_EBX)) {
759                         offset += 4;
760                 }
761
762                 if (cfg->used_int_regs & (1 << X86_EDI)) {
763                         offset += 4;
764                 }
765
766                 if (cfg->used_int_regs & (1 << X86_ESI)) {
767                         offset += 4;
768                 }
769         }
770
771         switch (cinfo->ret.storage) {
772         case ArgValuetypeInReg:
773                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
774                 offset += 8;
775                 cfg->ret->opcode = OP_REGOFFSET;
776                 cfg->ret->inst_basereg = X86_EBP;
777                 cfg->ret->inst_offset = - offset;
778                 break;
779         default:
780                 break;
781         }
782
783         /* Allocate locals */
784         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
785         if (locals_stack_align) {
786                 offset += (locals_stack_align - 1);
787                 offset &= ~(locals_stack_align - 1);
788         }
789         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
790                 if (offsets [i] != -1) {
791                         MonoInst *inst = cfg->varinfo [i];
792                         inst->opcode = OP_REGOFFSET;
793                         inst->inst_basereg = X86_EBP;
794                         inst->inst_offset = - (offset + offsets [i]);
795                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
796                 }
797         }
798         offset += locals_stack_size;
799
800
801         /*
802          * Allocate arguments+return value
803          */
804
805         switch (cinfo->ret.storage) {
806         case ArgOnStack:
807                 cfg->ret->opcode = OP_REGOFFSET;
808                 cfg->ret->inst_basereg = X86_EBP;
809                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
810                 break;
811         case ArgValuetypeInReg:
812                 break;
813         case ArgInIReg:
814                 cfg->ret->opcode = OP_REGVAR;
815                 cfg->ret->inst_c0 = cinfo->ret.reg;
816                 break;
817         case ArgNone:
818         case ArgOnFloatFpStack:
819         case ArgOnDoubleFpStack:
820                 break;
821         default:
822                 g_assert_not_reached ();
823         }
824
825         if (sig->call_convention == MONO_CALL_VARARG) {
826                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
827                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
828         }
829
830         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
831                 ArgInfo *ainfo = &cinfo->args [i];
832                 inst = cfg->varinfo [i];
833                 if (inst->opcode != OP_REGVAR) {
834                         inst->opcode = OP_REGOFFSET;
835                         inst->inst_basereg = X86_EBP;
836                 }
837                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
838         }
839
840         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
841         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
842
843         cfg->stack_offset = offset;
844
845         g_free (cinfo);
846 }
847
848 void
849 mono_arch_create_vars (MonoCompile *cfg)
850 {
851         MonoMethodSignature *sig;
852         CallInfo *cinfo;
853
854         sig = mono_method_signature (cfg->method);
855
856         cinfo = get_call_info (sig, FALSE);
857
858         if (cinfo->ret.storage == ArgValuetypeInReg)
859                 cfg->ret_var_is_local = TRUE;
860
861         g_free (cinfo);
862 }
863
864 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
865  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
866  */
867
868 static void
869 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
870 {
871         MonoInst *arg;
872         MonoMethodSignature *tmp_sig;
873         MonoInst *sig_arg;
874
875         /* FIXME: Add support for signature tokens to AOT */
876         cfg->disable_aot = TRUE;
877         MONO_INST_NEW (cfg, arg, OP_OUTARG);
878
879         /*
880          * mono_ArgIterator_Setup assumes the signature cookie is 
881          * passed first and all the arguments which were before it are
882          * passed on the stack after the signature. So compensate by 
883          * passing a different signature.
884          */
885         tmp_sig = mono_metadata_signature_dup (call->signature);
886         tmp_sig->param_count -= call->signature->sentinelpos;
887         tmp_sig->sentinelpos = 0;
888         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
889
890         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
891         sig_arg->inst_p0 = tmp_sig;
892
893         arg->inst_left = sig_arg;
894         arg->type = STACK_PTR;
895         /* prepend, so they get reversed */
896         arg->next = call->out_args;
897         call->out_args = arg;
898 }
899
900 /* 
901  * take the arguments and generate the arch-specific
902  * instructions to properly call the function in call.
903  * This includes pushing, moving arguments to the right register
904  * etc.
905  */
906 MonoCallInst*
907 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
908         MonoInst *arg, *in;
909         MonoMethodSignature *sig;
910         int i, n;
911         CallInfo *cinfo;
912         int sentinelpos = 0;
913
914         sig = call->signature;
915         n = sig->param_count + sig->hasthis;
916
917         cinfo = get_call_info (sig, FALSE);
918
919         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
920                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
921
922         for (i = 0; i < n; ++i) {
923                 ArgInfo *ainfo = cinfo->args + i;
924
925                 /* Emit the signature cookie just before the implicit arguments */
926                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
927                         emit_sig_cookie (cfg, call);
928                 }
929
930                 if (is_virtual && i == 0) {
931                         /* the argument will be attached to the call instrucion */
932                         in = call->args [i];
933                 } else {
934                         MonoType *t;
935
936                         if (i >= sig->hasthis)
937                                 t = sig->params [i - sig->hasthis];
938                         else
939                                 t = &mono_defaults.int_class->byval_arg;
940                         t = mono_type_get_underlying_type (t);
941
942                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
943                         in = call->args [i];
944                         arg->cil_code = in->cil_code;
945                         arg->inst_left = in;
946                         arg->type = in->type;
947                         /* prepend, so they get reversed */
948                         arg->next = call->out_args;
949                         call->out_args = arg;
950
951                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
952                                 guint32 size, align;
953
954                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
955                                         size = sizeof (MonoTypedRef);
956                                         align = sizeof (gpointer);
957                                 }
958                                 else
959                                         if (sig->pinvoke)
960                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
961                                         else {
962                                                 int ialign;
963                                                 size = mono_type_stack_size (&in->klass->byval_arg, &ialign);
964                                                 align = ialign;
965                                         }
966                                 arg->opcode = OP_OUTARG_VT;
967                                 arg->klass = in->klass;
968                                 arg->backend.is_pinvoke = sig->pinvoke;
969                                 arg->inst_imm = size; 
970                         }
971                         else {
972                                 switch (ainfo->storage) {
973                                 case ArgOnStack:
974                                         arg->opcode = OP_OUTARG;
975                                         if (!t->byref) {
976                                                 if (t->type == MONO_TYPE_R4)
977                                                         arg->opcode = OP_OUTARG_R4;
978                                                 else
979                                                         if (t->type == MONO_TYPE_R8)
980                                                                 arg->opcode = OP_OUTARG_R8;
981                                         }
982                                         break;
983                                 default:
984                                         g_assert_not_reached ();
985                                 }
986                         }
987                 }
988         }
989
990         /* Handle the case where there are no implicit arguments */
991         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
992                 emit_sig_cookie (cfg, call);
993         }
994
995         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
996                 if (cinfo->ret.storage == ArgValuetypeInReg) {
997                         MonoInst *zero_inst;
998                         /*
999                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1000                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1001                          * before calling the function. So we add a dummy instruction to represent pushing the 
1002                          * struct return address to the stack. The return address will be saved to this stack slot 
1003                          * by the code emitted in this_vret_args.
1004                          */
1005                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1006                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1007                         zero_inst->inst_p0 = 0;
1008                         arg->inst_left = zero_inst;
1009                         arg->type = STACK_PTR;
1010                         /* prepend, so they get reversed */
1011                         arg->next = call->out_args;
1012                         call->out_args = arg;
1013                 }
1014                 else
1015                         /* if the function returns a struct, the called method already does a ret $0x4 */
1016                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1017                                 cinfo->stack_usage -= 4;
1018         }
1019         
1020         call->stack_usage = cinfo->stack_usage;
1021
1022 #if defined(__APPLE__)
1023         if (cinfo->need_stack_align) {
1024                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1025                 arg->inst_c0 = cinfo->stack_align_amount;
1026                 arg->next = call->out_args;
1027                 call->out_args = arg;
1028         }
1029 #endif 
1030
1031         g_free (cinfo);
1032
1033         return call;
1034 }
1035
1036 /*
1037  * Allow tracing to work with this interface (with an optional argument)
1038  */
1039 void*
1040 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1041 {
1042         guchar *code = p;
1043
1044 #if __APPLE__
1045         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1046 #endif
1047
1048         /* if some args are passed in registers, we need to save them here */
1049         x86_push_reg (code, X86_EBP);
1050
1051         if (cfg->compile_aot) {
1052                 x86_push_imm (code, cfg->method);
1053                 x86_mov_reg_imm (code, X86_EAX, func);
1054                 x86_call_reg (code, X86_EAX);
1055         } else {
1056                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1057                 x86_push_imm (code, cfg->method);
1058                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1059                 x86_call_code (code, 0);
1060         }
1061 #if __APPLE__
1062         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 16);
1063 #else
1064         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1065 #endif
1066
1067         return code;
1068 }
1069
1070 enum {
1071         SAVE_NONE,
1072         SAVE_STRUCT,
1073         SAVE_EAX,
1074         SAVE_EAX_EDX,
1075         SAVE_FP
1076 };
1077
1078 void*
1079 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1080 {
1081         guchar *code = p;
1082         int arg_size = 0, save_mode = SAVE_NONE;
1083         MonoMethod *method = cfg->method;
1084         
1085         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1086         case MONO_TYPE_VOID:
1087                 /* special case string .ctor icall */
1088                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1089                         save_mode = SAVE_EAX;
1090                 else
1091                         save_mode = SAVE_NONE;
1092                 break;
1093         case MONO_TYPE_I8:
1094         case MONO_TYPE_U8:
1095                 save_mode = SAVE_EAX_EDX;
1096                 break;
1097         case MONO_TYPE_R4:
1098         case MONO_TYPE_R8:
1099                 save_mode = SAVE_FP;
1100                 break;
1101         case MONO_TYPE_GENERICINST:
1102                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1103                         save_mode = SAVE_EAX;
1104                         break;
1105                 }
1106                 /* Fall through */
1107         case MONO_TYPE_VALUETYPE:
1108                 save_mode = SAVE_STRUCT;
1109                 break;
1110         default:
1111                 save_mode = SAVE_EAX;
1112                 break;
1113         }
1114
1115         switch (save_mode) {
1116         case SAVE_EAX_EDX:
1117                 x86_push_reg (code, X86_EDX);
1118                 x86_push_reg (code, X86_EAX);
1119                 if (enable_arguments) {
1120                         x86_push_reg (code, X86_EDX);
1121                         x86_push_reg (code, X86_EAX);
1122                         arg_size = 8;
1123                 }
1124                 break;
1125         case SAVE_EAX:
1126                 x86_push_reg (code, X86_EAX);
1127                 if (enable_arguments) {
1128                         x86_push_reg (code, X86_EAX);
1129                         arg_size = 4;
1130                 }
1131                 break;
1132         case SAVE_FP:
1133                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1134                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1135                 if (enable_arguments) {
1136                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1137                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1138                         arg_size = 8;
1139                 }
1140                 break;
1141         case SAVE_STRUCT:
1142                 if (enable_arguments) {
1143                         x86_push_membase (code, X86_EBP, 8);
1144                         arg_size = 4;
1145                 }
1146                 break;
1147         case SAVE_NONE:
1148         default:
1149                 break;
1150         }
1151
1152         if (cfg->compile_aot) {
1153                 x86_push_imm (code, method);
1154                 x86_mov_reg_imm (code, X86_EAX, func);
1155                 x86_call_reg (code, X86_EAX);
1156         } else {
1157                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1158                 x86_push_imm (code, method);
1159                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1160                 x86_call_code (code, 0);
1161         }
1162         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1163
1164         switch (save_mode) {
1165         case SAVE_EAX_EDX:
1166                 x86_pop_reg (code, X86_EAX);
1167                 x86_pop_reg (code, X86_EDX);
1168                 break;
1169         case SAVE_EAX:
1170                 x86_pop_reg (code, X86_EAX);
1171                 break;
1172         case SAVE_FP:
1173                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1174                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1175                 break;
1176         case SAVE_NONE:
1177         default:
1178                 break;
1179         }
1180
1181         return code;
1182 }
1183
1184 #define EMIT_COND_BRANCH(ins,cond,sign) \
1185 if (ins->flags & MONO_INST_BRLABEL) { \
1186         if (ins->inst_i0->inst_c0) { \
1187                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1188         } else { \
1189                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1190                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1191                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1192                         x86_branch8 (code, cond, 0, sign); \
1193                 else \
1194                         x86_branch32 (code, cond, 0, sign); \
1195         } \
1196 } else { \
1197         if (ins->inst_true_bb->native_offset) { \
1198                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1199         } else { \
1200                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1201                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1202                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1203                         x86_branch8 (code, cond, 0, sign); \
1204                 else \
1205                         x86_branch32 (code, cond, 0, sign); \
1206         } \
1207 }
1208
1209 /*  
1210  *      Emit an exception if condition is fail and
1211  *  if possible do a directly branch to target 
1212  */
1213 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1214         do {                                                        \
1215                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1216                 if (tins == NULL) {                                                                             \
1217                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1218                                         MONO_PATCH_INFO_EXC, exc_name);  \
1219                         x86_branch32 (code, cond, 0, signed);               \
1220                 } else {        \
1221                         EMIT_COND_BRANCH (tins, cond, signed);  \
1222                 }                       \
1223         } while (0); 
1224
1225 #define EMIT_FPCOMPARE(code) do { \
1226         x86_fcompp (code); \
1227         x86_fnstsw (code); \
1228 } while (0); 
1229
1230
1231 static guint8*
1232 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1233 {
1234         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1235         x86_call_code (code, 0);
1236
1237         return code;
1238 }
1239
1240 /* FIXME: Add more instructions */
1241 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1242
1243 static void
1244 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1245 {
1246         MonoInst *ins, *last_ins = NULL;
1247         ins = bb->code;
1248
1249         while (ins) {
1250
1251                 switch (ins->opcode) {
1252                 case OP_ICONST:
1253                         /* reg = 0 -> XOR (reg, reg) */
1254                         /* XOR sets cflags on x86, so we cant do it always */
1255                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1256                                 ins->opcode = CEE_XOR;
1257                                 ins->sreg1 = ins->dreg;
1258                                 ins->sreg2 = ins->dreg;
1259                         }
1260                         break;
1261                 case OP_MUL_IMM: 
1262                         /* remove unnecessary multiplication with 1 */
1263                         if (ins->inst_imm == 1) {
1264                                 if (ins->dreg != ins->sreg1) {
1265                                         ins->opcode = OP_MOVE;
1266                                 } else {
1267                                         last_ins->next = ins->next;
1268                                         ins = ins->next;
1269                                         continue;
1270                                 }
1271                         }
1272                         break;
1273                 case OP_COMPARE_IMM:
1274                         /* OP_COMPARE_IMM (reg, 0) 
1275                          * --> 
1276                          * OP_X86_TEST_NULL (reg) 
1277                          */
1278                         if (!ins->inst_imm)
1279                                 ins->opcode = OP_X86_TEST_NULL;
1280                         break;
1281                 case OP_X86_COMPARE_MEMBASE_IMM:
1282                         /* 
1283                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1284                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1285                          * -->
1286                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1287                          * OP_COMPARE_IMM reg, imm
1288                          *
1289                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1290                          */
1291                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1292                             ins->inst_basereg == last_ins->inst_destbasereg &&
1293                             ins->inst_offset == last_ins->inst_offset) {
1294                                         ins->opcode = OP_COMPARE_IMM;
1295                                         ins->sreg1 = last_ins->sreg1;
1296
1297                                         /* check if we can remove cmp reg,0 with test null */
1298                                         if (!ins->inst_imm)
1299                                                 ins->opcode = OP_X86_TEST_NULL;
1300                                 }
1301
1302                         break;
1303                 case OP_LOAD_MEMBASE:
1304                 case OP_LOADI4_MEMBASE:
1305                         /* 
1306                          * Note: if reg1 = reg2 the load op is removed
1307                          *
1308                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1309                          * OP_LOAD_MEMBASE offset(basereg), reg2
1310                          * -->
1311                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1312                          * OP_MOVE reg1, reg2
1313                          */
1314                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1315                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1316                             ins->inst_basereg == last_ins->inst_destbasereg &&
1317                             ins->inst_offset == last_ins->inst_offset) {
1318                                 if (ins->dreg == last_ins->sreg1) {
1319                                         last_ins->next = ins->next;                             
1320                                         ins = ins->next;                                
1321                                         continue;
1322                                 } else {
1323                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1324                                         ins->opcode = OP_MOVE;
1325                                         ins->sreg1 = last_ins->sreg1;
1326                                 }
1327
1328                         /* 
1329                          * Note: reg1 must be different from the basereg in the second load
1330                          * Note: if reg1 = reg2 is equal then second load is removed
1331                          *
1332                          * OP_LOAD_MEMBASE offset(basereg), reg1
1333                          * OP_LOAD_MEMBASE offset(basereg), reg2
1334                          * -->
1335                          * OP_LOAD_MEMBASE offset(basereg), reg1
1336                          * OP_MOVE reg1, reg2
1337                          */
1338                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1339                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1340                               ins->inst_basereg != last_ins->dreg &&
1341                               ins->inst_basereg == last_ins->inst_basereg &&
1342                               ins->inst_offset == last_ins->inst_offset) {
1343
1344                                 if (ins->dreg == last_ins->dreg) {
1345                                         last_ins->next = ins->next;                             
1346                                         ins = ins->next;                                
1347                                         continue;
1348                                 } else {
1349                                         ins->opcode = OP_MOVE;
1350                                         ins->sreg1 = last_ins->dreg;
1351                                 }
1352
1353                                 //g_assert_not_reached ();
1354
1355 #if 0
1356                         /* 
1357                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1358                          * OP_LOAD_MEMBASE offset(basereg), reg
1359                          * -->
1360                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1361                          * OP_ICONST reg, imm
1362                          */
1363                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1364                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1365                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1366                                    ins->inst_offset == last_ins->inst_offset) {
1367                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1368                                 ins->opcode = OP_ICONST;
1369                                 ins->inst_c0 = last_ins->inst_imm;
1370                                 g_assert_not_reached (); // check this rule
1371 #endif
1372                         }
1373                         break;
1374                 case OP_LOADU1_MEMBASE:
1375                 case OP_LOADI1_MEMBASE:
1376                         /* 
1377                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1378                          * OP_LOAD_MEMBASE offset(basereg), reg2
1379                          * -->
1380                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1381                          * CONV_I2/U2 reg1, reg2
1382                          */
1383                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1384                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1385                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1386                                         ins->inst_offset == last_ins->inst_offset) {
1387                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1388                                 ins->sreg1 = last_ins->sreg1;
1389                         }
1390                         break;
1391                 case OP_LOADU2_MEMBASE:
1392                 case OP_LOADI2_MEMBASE:
1393                         /* 
1394                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1395                          * OP_LOAD_MEMBASE offset(basereg), reg2
1396                          * -->
1397                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1398                          * CONV_I2/U2 reg1, reg2
1399                          */
1400                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1401                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1402                                         ins->inst_offset == last_ins->inst_offset) {
1403                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1404                                 ins->sreg1 = last_ins->sreg1;
1405                         }
1406                         break;
1407                 case CEE_CONV_I4:
1408                 case CEE_CONV_U4:
1409                 case OP_MOVE:
1410                         /*
1411                          * Removes:
1412                          *
1413                          * OP_MOVE reg, reg 
1414                          */
1415                         if (ins->dreg == ins->sreg1) {
1416                                 if (last_ins)
1417                                         last_ins->next = ins->next;                             
1418                                 ins = ins->next;
1419                                 continue;
1420                         }
1421                         /* 
1422                          * Removes:
1423                          *
1424                          * OP_MOVE sreg, dreg 
1425                          * OP_MOVE dreg, sreg
1426                          */
1427                         if (last_ins && last_ins->opcode == OP_MOVE &&
1428                             ins->sreg1 == last_ins->dreg &&
1429                             ins->dreg == last_ins->sreg1) {
1430                                 last_ins->next = ins->next;                             
1431                                 ins = ins->next;                                
1432                                 continue;
1433                         }
1434                         break;
1435                         
1436                 case OP_X86_PUSH_MEMBASE:
1437                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1438                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1439                             ins->inst_basereg == last_ins->inst_destbasereg &&
1440                             ins->inst_offset == last_ins->inst_offset) {
1441                                     ins->opcode = OP_X86_PUSH;
1442                                     ins->sreg1 = last_ins->sreg1;
1443                         }
1444                         break;
1445                 }
1446                 last_ins = ins;
1447                 ins = ins->next;
1448         }
1449         bb->last_ins = last_ins;
1450 }
1451
1452 static const int 
1453 branch_cc_table [] = {
1454         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1455         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1456         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1457 };
1458
1459 /*#include "cprop.c"*/
1460 void
1461 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1462 {
1463         mono_local_regalloc (cfg, bb);
1464 }
1465
1466 static unsigned char*
1467 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1468 {
1469         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1470         x86_fnstcw_membase(code, X86_ESP, 0);
1471         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1472         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1473         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1474         x86_fldcw_membase (code, X86_ESP, 2);
1475         if (size == 8) {
1476                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1477                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1478                 x86_pop_reg (code, dreg);
1479                 /* FIXME: need the high register 
1480                  * x86_pop_reg (code, dreg_high);
1481                  */
1482         } else {
1483                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1484                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1485                 x86_pop_reg (code, dreg);
1486         }
1487         x86_fldcw_membase (code, X86_ESP, 0);
1488         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1489
1490         if (size == 1)
1491                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1492         else if (size == 2)
1493                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1494         return code;
1495 }
1496
1497 static unsigned char*
1498 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1499 {
1500         int sreg = tree->sreg1;
1501         int need_touch = FALSE;
1502
1503 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1504         need_touch = TRUE;
1505 #endif
1506
1507         if (need_touch) {
1508                 guint8* br[5];
1509
1510                 /*
1511                  * Under Windows:
1512                  * If requested stack size is larger than one page,
1513                  * perform stack-touch operation
1514                  */
1515                 /*
1516                  * Generate stack probe code.
1517                  * Under Windows, it is necessary to allocate one page at a time,
1518                  * "touching" stack after each successful sub-allocation. This is
1519                  * because of the way stack growth is implemented - there is a
1520                  * guard page before the lowest stack page that is currently commited.
1521                  * Stack normally grows sequentially so OS traps access to the
1522                  * guard page and commits more pages when needed.
1523                  */
1524                 x86_test_reg_imm (code, sreg, ~0xFFF);
1525                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1526
1527                 br[2] = code; /* loop */
1528                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1529                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1530
1531                 /* 
1532                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1533                  * that follows only initializes the last part of the area.
1534                  */
1535                 /* Same as the init code below with size==0x1000 */
1536                 if (tree->flags & MONO_INST_INIT) {
1537                         x86_push_reg (code, X86_EAX);
1538                         x86_push_reg (code, X86_ECX);
1539                         x86_push_reg (code, X86_EDI);
1540                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1541                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1542                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1543                         x86_cld (code);
1544                         x86_prefix (code, X86_REP_PREFIX);
1545                         x86_stosl (code);
1546                         x86_pop_reg (code, X86_EDI);
1547                         x86_pop_reg (code, X86_ECX);
1548                         x86_pop_reg (code, X86_EAX);
1549                 }
1550
1551                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1552                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1553                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1554                 x86_patch (br[3], br[2]);
1555                 x86_test_reg_reg (code, sreg, sreg);
1556                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1557                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1558
1559                 br[1] = code; x86_jump8 (code, 0);
1560
1561                 x86_patch (br[0], code);
1562                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1563                 x86_patch (br[1], code);
1564                 x86_patch (br[4], code);
1565         }
1566         else
1567                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1568
1569         if (tree->flags & MONO_INST_INIT) {
1570                 int offset = 0;
1571                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1572                         x86_push_reg (code, X86_EAX);
1573                         offset += 4;
1574                 }
1575                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1576                         x86_push_reg (code, X86_ECX);
1577                         offset += 4;
1578                 }
1579                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1580                         x86_push_reg (code, X86_EDI);
1581                         offset += 4;
1582                 }
1583                 
1584                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1585                 if (sreg != X86_ECX)
1586                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1587                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1588                                 
1589                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1590                 x86_cld (code);
1591                 x86_prefix (code, X86_REP_PREFIX);
1592                 x86_stosl (code);
1593                 
1594                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1595                         x86_pop_reg (code, X86_EDI);
1596                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1597                         x86_pop_reg (code, X86_ECX);
1598                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1599                         x86_pop_reg (code, X86_EAX);
1600         }
1601         return code;
1602 }
1603
1604
1605 static guint8*
1606 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1607 {
1608         CallInfo *cinfo;
1609         int quad;
1610
1611         /* Move return value to the target register */
1612         switch (ins->opcode) {
1613         case CEE_CALL:
1614         case OP_CALL_REG:
1615         case OP_CALL_MEMBASE:
1616                 if (ins->dreg != X86_EAX)
1617                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1618                 break;
1619         case OP_VCALL:
1620         case OP_VCALL_REG:
1621         case OP_VCALL_MEMBASE:
1622                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
1623                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1624                         /* Pop the destination address from the stack */
1625                         x86_pop_reg (code, X86_ECX);
1626                         
1627                         for (quad = 0; quad < 2; quad ++) {
1628                                 switch (cinfo->ret.pair_storage [quad]) {
1629                                 case ArgInIReg:
1630                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1631                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1632                                         break;
1633                                 case ArgNone:
1634                                         break;
1635                                 default:
1636                                         g_assert_not_reached ();
1637                                 }
1638                         }
1639                 }
1640                 g_free (cinfo);
1641         default:
1642                 break;
1643         }
1644
1645         return code;
1646 }
1647
1648 /*
1649  * emit_tls_get:
1650  * @code: buffer to store code to
1651  * @dreg: hard register where to place the result
1652  * @tls_offset: offset info
1653  *
1654  * emit_tls_get emits in @code the native code that puts in the dreg register
1655  * the item in the thread local storage identified by tls_offset.
1656  *
1657  * Returns: a pointer to the end of the stored code
1658  */
1659 static guint8*
1660 emit_tls_get (guint8* code, int dreg, int tls_offset)
1661 {
1662 #ifdef PLATFORM_WIN32
1663         /* 
1664          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1665          * Journal and/or a disassembly of the TlsGet () function.
1666          */
1667         g_assert (tls_offset < 64);
1668         x86_prefix (code, X86_FS_PREFIX);
1669         x86_mov_reg_mem (code, dreg, 0x18, 4);
1670         /* Dunno what this does but TlsGetValue () contains it */
1671         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1672         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1673 #else
1674         if (optimize_for_xen) {
1675                 x86_prefix (code, X86_GS_PREFIX);
1676                 x86_mov_reg_mem (code, dreg, 0, 4);
1677                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1678         } else {
1679                 x86_prefix (code, X86_GS_PREFIX);
1680                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1681         }
1682 #endif
1683         return code;
1684 }
1685
1686 /*
1687  * emit_load_volatile_arguments:
1688  *
1689  *  Load volatile arguments from the stack to the original input registers.
1690  * Required before a tail call.
1691  */
1692 static guint8*
1693 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
1694 {
1695         MonoMethod *method = cfg->method;
1696         MonoMethodSignature *sig;
1697         MonoInst *inst;
1698         CallInfo *cinfo;
1699         guint32 i;
1700
1701         /* FIXME: Generate intermediate code instead */
1702
1703         sig = mono_method_signature (method);
1704
1705         cinfo = get_call_info (sig, FALSE);
1706         
1707         /* This is the opposite of the code in emit_prolog */
1708
1709         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1710                 ArgInfo *ainfo = cinfo->args + i;
1711                 MonoType *arg_type;
1712                 inst = cfg->varinfo [i];
1713
1714                 if (sig->hasthis && (i == 0))
1715                         arg_type = &mono_defaults.object_class->byval_arg;
1716                 else
1717                         arg_type = sig->params [i - sig->hasthis];
1718
1719                 /*
1720                  * On x86, the arguments are either in their original stack locations, or in
1721                  * global regs.
1722                  */
1723                 if (inst->opcode == OP_REGVAR) {
1724                         g_assert (ainfo->storage == ArgOnStack);
1725                         
1726                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
1727                 }
1728         }
1729
1730         g_free (cinfo);
1731
1732         return code;
1733 }
1734
1735 #define REAL_PRINT_REG(text,reg) \
1736 mono_assert (reg >= 0); \
1737 x86_push_reg (code, X86_EAX); \
1738 x86_push_reg (code, X86_EDX); \
1739 x86_push_reg (code, X86_ECX); \
1740 x86_push_reg (code, reg); \
1741 x86_push_imm (code, reg); \
1742 x86_push_imm (code, text " %d %p\n"); \
1743 x86_mov_reg_imm (code, X86_EAX, printf); \
1744 x86_call_reg (code, X86_EAX); \
1745 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1746 x86_pop_reg (code, X86_ECX); \
1747 x86_pop_reg (code, X86_EDX); \
1748 x86_pop_reg (code, X86_EAX);
1749
1750 /* benchmark and set based on cpu */
1751 #define LOOP_ALIGNMENT 8
1752 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
1753
1754 void
1755 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1756 {
1757         MonoInst *ins;
1758         MonoCallInst *call;
1759         guint offset;
1760         guint8 *code = cfg->native_code + cfg->code_len;
1761         MonoInst *last_ins = NULL;
1762         guint last_offset = 0;
1763         int max_len, cpos;
1764
1765         if (cfg->opt & MONO_OPT_PEEPHOLE)
1766                 peephole_pass (cfg, bb);
1767
1768         if (cfg->opt & MONO_OPT_LOOP) {
1769                 int pad, align = LOOP_ALIGNMENT;
1770                 /* set alignment depending on cpu */
1771                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
1772                         pad = align - pad;
1773                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
1774                         x86_padding (code, pad);
1775                         cfg->code_len += pad;
1776                         bb->native_offset = cfg->code_len;
1777                 }
1778         }
1779
1780         if (cfg->verbose_level > 2)
1781                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1782
1783         cpos = bb->max_offset;
1784
1785         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
1786                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
1787                 g_assert (!cfg->compile_aot);
1788                 cpos += 6;
1789
1790                 cov->data [bb->dfn].cil_code = bb->cil_code;
1791                 /* this is not thread save, but good enough */
1792                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
1793         }
1794
1795         offset = code - cfg->native_code;
1796
1797         mono_debug_open_block (cfg, bb, offset);
1798
1799         ins = bb->code;
1800         while (ins) {
1801                 offset = code - cfg->native_code;
1802
1803                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
1804
1805                 if (offset > (cfg->code_size - max_len - 16)) {
1806                         cfg->code_size *= 2;
1807                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
1808                         code = cfg->native_code + offset;
1809                         mono_jit_stats.code_reallocs++;
1810                 }
1811
1812                 mono_debug_record_line_number (cfg, ins, offset);
1813
1814                 switch (ins->opcode) {
1815                 case OP_BIGMUL:
1816                         x86_mul_reg (code, ins->sreg2, TRUE);
1817                         break;
1818                 case OP_BIGMUL_UN:
1819                         x86_mul_reg (code, ins->sreg2, FALSE);
1820                         break;
1821                 case OP_X86_SETEQ_MEMBASE:
1822                 case OP_X86_SETNE_MEMBASE:
1823                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
1824                                          ins->inst_basereg, ins->inst_offset, TRUE);
1825                         break;
1826                 case OP_STOREI1_MEMBASE_IMM:
1827                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
1828                         break;
1829                 case OP_STOREI2_MEMBASE_IMM:
1830                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
1831                         break;
1832                 case OP_STORE_MEMBASE_IMM:
1833                 case OP_STOREI4_MEMBASE_IMM:
1834                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
1835                         break;
1836                 case OP_STOREI1_MEMBASE_REG:
1837                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
1838                         break;
1839                 case OP_STOREI2_MEMBASE_REG:
1840                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
1841                         break;
1842                 case OP_STORE_MEMBASE_REG:
1843                 case OP_STOREI4_MEMBASE_REG:
1844                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
1845                         break;
1846                 case CEE_LDIND_I:
1847                 case CEE_LDIND_I4:
1848                 case CEE_LDIND_U4:
1849                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
1850                         break;
1851                 case OP_LOADU4_MEM:
1852                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
1853                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
1854                         break;
1855                 case OP_LOAD_MEMBASE:
1856                 case OP_LOADI4_MEMBASE:
1857                 case OP_LOADU4_MEMBASE:
1858                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
1859                         break;
1860                 case OP_LOADU1_MEMBASE:
1861                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
1862                         break;
1863                 case OP_LOADI1_MEMBASE:
1864                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
1865                         break;
1866                 case OP_LOADU2_MEMBASE:
1867                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
1868                         break;
1869                 case OP_LOADI2_MEMBASE:
1870                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
1871                         break;
1872                 case CEE_CONV_I1:
1873                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
1874                         break;
1875                 case CEE_CONV_I2:
1876                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
1877                         break;
1878                 case CEE_CONV_U1:
1879                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
1880                         break;
1881                 case CEE_CONV_U2:
1882                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
1883                         break;
1884                 case OP_COMPARE:
1885                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
1886                         break;
1887                 case OP_COMPARE_IMM:
1888                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
1889                         break;
1890                 case OP_X86_COMPARE_MEMBASE_REG:
1891                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
1892                         break;
1893                 case OP_X86_COMPARE_MEMBASE_IMM:
1894                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1895                         break;
1896                 case OP_X86_COMPARE_MEMBASE8_IMM:
1897                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1898                         break;
1899                 case OP_X86_COMPARE_REG_MEMBASE:
1900                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
1901                         break;
1902                 case OP_X86_COMPARE_MEM_IMM:
1903                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
1904                         break;
1905                 case OP_X86_TEST_NULL:
1906                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
1907                         break;
1908                 case OP_X86_ADD_MEMBASE_IMM:
1909                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1910                         break;
1911                 case OP_X86_ADD_MEMBASE:
1912                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
1913                         break;
1914                 case OP_X86_SUB_MEMBASE_IMM:
1915                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1916                         break;
1917                 case OP_X86_SUB_MEMBASE:
1918                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
1919                         break;
1920                 case OP_X86_AND_MEMBASE_IMM:
1921                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1922                         break;
1923                 case OP_X86_OR_MEMBASE_IMM:
1924                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1925                         break;
1926                 case OP_X86_XOR_MEMBASE_IMM:
1927                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1928                         break;
1929                 case OP_X86_INC_MEMBASE:
1930                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
1931                         break;
1932                 case OP_X86_INC_REG:
1933                         x86_inc_reg (code, ins->dreg);
1934                         break;
1935                 case OP_X86_DEC_MEMBASE:
1936                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
1937                         break;
1938                 case OP_X86_DEC_REG:
1939                         x86_dec_reg (code, ins->dreg);
1940                         break;
1941                 case OP_X86_MUL_MEMBASE:
1942                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
1943                         break;
1944                 case CEE_BREAK:
1945                         x86_breakpoint (code);
1946                         break;
1947                 case OP_ADDCC:
1948                 case CEE_ADD:
1949                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
1950                         break;
1951                 case OP_ADC:
1952                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
1953                         break;
1954                 case OP_ADDCC_IMM:
1955                 case OP_ADD_IMM:
1956                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
1957                         break;
1958                 case OP_ADC_IMM:
1959                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
1960                         break;
1961                 case OP_SUBCC:
1962                 case CEE_SUB:
1963                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
1964                         break;
1965                 case OP_SBB:
1966                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
1967                         break;
1968                 case OP_SUBCC_IMM:
1969                 case OP_SUB_IMM:
1970                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
1971                         break;
1972                 case OP_SBB_IMM:
1973                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
1974                         break;
1975                 case CEE_AND:
1976                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
1977                         break;
1978                 case OP_AND_IMM:
1979                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
1980                         break;
1981                 case CEE_DIV:
1982                         x86_cdq (code);
1983                         x86_div_reg (code, ins->sreg2, TRUE);
1984                         break;
1985                 case CEE_DIV_UN:
1986                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1987                         x86_div_reg (code, ins->sreg2, FALSE);
1988                         break;
1989                 case OP_DIV_IMM:
1990                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1991                         x86_cdq (code);
1992                         x86_div_reg (code, ins->sreg2, TRUE);
1993                         break;
1994                 case CEE_REM:
1995                         x86_cdq (code);
1996                         x86_div_reg (code, ins->sreg2, TRUE);
1997                         break;
1998                 case CEE_REM_UN:
1999                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2000                         x86_div_reg (code, ins->sreg2, FALSE);
2001                         break;
2002                 case OP_REM_IMM:
2003                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2004                         x86_cdq (code);
2005                         x86_div_reg (code, ins->sreg2, TRUE);
2006                         break;
2007                 case CEE_OR:
2008                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2009                         break;
2010                 case OP_OR_IMM:
2011                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2012                         break;
2013                 case CEE_XOR:
2014                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2015                         break;
2016                 case OP_XOR_IMM:
2017                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2018                         break;
2019                 case CEE_SHL:
2020                         g_assert (ins->sreg2 == X86_ECX);
2021                         x86_shift_reg (code, X86_SHL, ins->dreg);
2022                         break;
2023                 case CEE_SHR:
2024                         g_assert (ins->sreg2 == X86_ECX);
2025                         x86_shift_reg (code, X86_SAR, ins->dreg);
2026                         break;
2027                 case OP_SHR_IMM:
2028                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2029                         break;
2030                 case OP_SHR_UN_IMM:
2031                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2032                         break;
2033                 case CEE_SHR_UN:
2034                         g_assert (ins->sreg2 == X86_ECX);
2035                         x86_shift_reg (code, X86_SHR, ins->dreg);
2036                         break;
2037                 case OP_SHL_IMM:
2038                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2039                         break;
2040                 case OP_LSHL: {
2041                         guint8 *jump_to_end;
2042
2043                         /* handle shifts below 32 bits */
2044                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2045                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2046
2047                         x86_test_reg_imm (code, X86_ECX, 32);
2048                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2049
2050                         /* handle shift over 32 bit */
2051                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2052                         x86_clear_reg (code, ins->sreg1);
2053                         
2054                         x86_patch (jump_to_end, code);
2055                         }
2056                         break;
2057                 case OP_LSHR: {
2058                         guint8 *jump_to_end;
2059
2060                         /* handle shifts below 32 bits */
2061                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2062                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2063
2064                         x86_test_reg_imm (code, X86_ECX, 32);
2065                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2066
2067                         /* handle shifts over 31 bits */
2068                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2069                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2070                         
2071                         x86_patch (jump_to_end, code);
2072                         }
2073                         break;
2074                 case OP_LSHR_UN: {
2075                         guint8 *jump_to_end;
2076
2077                         /* handle shifts below 32 bits */
2078                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2079                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2080
2081                         x86_test_reg_imm (code, X86_ECX, 32);
2082                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2083
2084                         /* handle shifts over 31 bits */
2085                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2086                         x86_clear_reg (code, ins->backend.reg3);
2087                         
2088                         x86_patch (jump_to_end, code);
2089                         }
2090                         break;
2091                 case OP_LSHL_IMM:
2092                         if (ins->inst_imm >= 32) {
2093                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2094                                 x86_clear_reg (code, ins->sreg1);
2095                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2096                         } else {
2097                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2098                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2099                         }
2100                         break;
2101                 case OP_LSHR_IMM:
2102                         if (ins->inst_imm >= 32) {
2103                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2104                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2105                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2106                         } else {
2107                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2108                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2109                         }
2110                         break;
2111                 case OP_LSHR_UN_IMM:
2112                         if (ins->inst_imm >= 32) {
2113                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2114                                 x86_clear_reg (code, ins->backend.reg3);
2115                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2116                         } else {
2117                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2118                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2119                         }
2120                         break;
2121                 case CEE_NOT:
2122                         x86_not_reg (code, ins->sreg1);
2123                         break;
2124                 case CEE_NEG:
2125                         x86_neg_reg (code, ins->sreg1);
2126                         break;
2127                 case OP_SEXT_I1:
2128                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2129                         break;
2130                 case OP_SEXT_I2:
2131                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2132                         break;
2133                 case CEE_MUL:
2134                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2135                         break;
2136                 case OP_MUL_IMM:
2137                         switch (ins->inst_imm) {
2138                         case 2:
2139                                 /* MOV r1, r2 */
2140                                 /* ADD r1, r1 */
2141                                 if (ins->dreg != ins->sreg1)
2142                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2143                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2144                                 break;
2145                         case 3:
2146                                 /* LEA r1, [r2 + r2*2] */
2147                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2148                                 break;
2149                         case 5:
2150                                 /* LEA r1, [r2 + r2*4] */
2151                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2152                                 break;
2153                         case 6:
2154                                 /* LEA r1, [r2 + r2*2] */
2155                                 /* ADD r1, r1          */
2156                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2157                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2158                                 break;
2159                         case 9:
2160                                 /* LEA r1, [r2 + r2*8] */
2161                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2162                                 break;
2163                         case 10:
2164                                 /* LEA r1, [r2 + r2*4] */
2165                                 /* ADD r1, r1          */
2166                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2167                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2168                                 break;
2169                         case 12:
2170                                 /* LEA r1, [r2 + r2*2] */
2171                                 /* SHL r1, 2           */
2172                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2173                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2174                                 break;
2175                         case 25:
2176                                 /* LEA r1, [r2 + r2*4] */
2177                                 /* LEA r1, [r1 + r1*4] */
2178                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2179                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2180                                 break;
2181                         case 100:
2182                                 /* LEA r1, [r2 + r2*4] */
2183                                 /* SHL r1, 2           */
2184                                 /* LEA r1, [r1 + r1*4] */
2185                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2186                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2187                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2188                                 break;
2189                         default:
2190                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2191                                 break;
2192                         }
2193                         break;
2194                 case CEE_MUL_OVF:
2195                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2196                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2197                         break;
2198                 case CEE_MUL_OVF_UN: {
2199                         /* the mul operation and the exception check should most likely be split */
2200                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2201                         /*g_assert (ins->sreg2 == X86_EAX);
2202                         g_assert (ins->dreg == X86_EAX);*/
2203                         if (ins->sreg2 == X86_EAX) {
2204                                 non_eax_reg = ins->sreg1;
2205                         } else if (ins->sreg1 == X86_EAX) {
2206                                 non_eax_reg = ins->sreg2;
2207                         } else {
2208                                 /* no need to save since we're going to store to it anyway */
2209                                 if (ins->dreg != X86_EAX) {
2210                                         saved_eax = TRUE;
2211                                         x86_push_reg (code, X86_EAX);
2212                                 }
2213                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2214                                 non_eax_reg = ins->sreg2;
2215                         }
2216                         if (ins->dreg == X86_EDX) {
2217                                 if (!saved_eax) {
2218                                         saved_eax = TRUE;
2219                                         x86_push_reg (code, X86_EAX);
2220                                 }
2221                         } else if (ins->dreg != X86_EAX) {
2222                                 saved_edx = TRUE;
2223                                 x86_push_reg (code, X86_EDX);
2224                         }
2225                         x86_mul_reg (code, non_eax_reg, FALSE);
2226                         /* save before the check since pop and mov don't change the flags */
2227                         if (ins->dreg != X86_EAX)
2228                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2229                         if (saved_edx)
2230                                 x86_pop_reg (code, X86_EDX);
2231                         if (saved_eax)
2232                                 x86_pop_reg (code, X86_EAX);
2233                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2234                         break;
2235                 }
2236                 case OP_ICONST:
2237                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2238                         break;
2239                 case OP_AOTCONST:
2240                         g_assert_not_reached ();
2241                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2242                         x86_mov_reg_imm (code, ins->dreg, 0);
2243                         break;
2244                 case OP_LOAD_GOTADDR:
2245                         x86_call_imm (code, 0);
2246                         /* 
2247                          * The patch needs to point to the pop, since the GOT offset needs 
2248                          * to be added to that address.
2249                          */
2250                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2251                         x86_pop_reg (code, ins->dreg);
2252                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2253                         break;
2254                 case OP_GOT_ENTRY:
2255                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2256                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2257                         break;
2258                 case OP_X86_PUSH_GOT_ENTRY:
2259                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2260                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2261                         break;
2262                 case CEE_CONV_I4:
2263                 case OP_MOVE:
2264                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2265                         break;
2266                 case CEE_CONV_U4:
2267                         g_assert_not_reached ();
2268                 case CEE_JMP: {
2269                         /*
2270                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2271                          * Keep in sync with the code in emit_epilog.
2272                          */
2273                         int pos = 0;
2274
2275                         /* FIXME: no tracing support... */
2276                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2277                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2278                         /* reset offset to make max_len work */
2279                         offset = code - cfg->native_code;
2280
2281                         g_assert (!cfg->method->save_lmf);
2282
2283                         code = emit_load_volatile_arguments (cfg, code);
2284
2285                         if (cfg->used_int_regs & (1 << X86_EBX))
2286                                 pos -= 4;
2287                         if (cfg->used_int_regs & (1 << X86_EDI))
2288                                 pos -= 4;
2289                         if (cfg->used_int_regs & (1 << X86_ESI))
2290                                 pos -= 4;
2291                         if (pos)
2292                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2293         
2294                         if (cfg->used_int_regs & (1 << X86_ESI))
2295                                 x86_pop_reg (code, X86_ESI);
2296                         if (cfg->used_int_regs & (1 << X86_EDI))
2297                                 x86_pop_reg (code, X86_EDI);
2298                         if (cfg->used_int_regs & (1 << X86_EBX))
2299                                 x86_pop_reg (code, X86_EBX);
2300         
2301                         /* restore ESP/EBP */
2302                         x86_leave (code);
2303                         offset = code - cfg->native_code;
2304                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2305                         x86_jump32 (code, 0);
2306                         break;
2307                 }
2308                 case OP_CHECK_THIS:
2309                         /* ensure ins->sreg1 is not NULL
2310                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2311                          * cmp DWORD PTR [eax], 0
2312                          */
2313                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2314                         break;
2315                 case OP_ARGLIST: {
2316                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2317                         x86_push_reg (code, hreg);
2318                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2319                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2320                         x86_pop_reg (code, hreg);
2321                         break;
2322                 }
2323                 case OP_FCALL:
2324                 case OP_LCALL:
2325                 case OP_VCALL:
2326                 case OP_VOIDCALL:
2327                 case CEE_CALL:
2328                         call = (MonoCallInst*)ins;
2329                         if (ins->flags & MONO_INST_HAS_METHOD)
2330                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2331                         else
2332                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2333                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2334                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2335                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2336                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2337                                  * smart enough to do that optimization yet
2338                                  *
2339                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2340                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2341                                  * (most likely from locality benefits). People with other processors should
2342                                  * check on theirs to see what happens.
2343                                  */
2344                                 if (call->stack_usage == 4) {
2345                                         /* we want to use registers that won't get used soon, so use
2346                                          * ecx, as eax will get allocated first. edx is used by long calls,
2347                                          * so we can't use that.
2348                                          */
2349                                         
2350                                         x86_pop_reg (code, X86_ECX);
2351                                 } else {
2352                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2353                                 }
2354                         }
2355                         code = emit_move_return_value (cfg, ins, code);
2356                         break;
2357                 case OP_FCALL_REG:
2358                 case OP_LCALL_REG:
2359                 case OP_VCALL_REG:
2360                 case OP_VOIDCALL_REG:
2361                 case OP_CALL_REG:
2362                         call = (MonoCallInst*)ins;
2363                         x86_call_reg (code, ins->sreg1);
2364                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2365                                 if (call->stack_usage == 4)
2366                                         x86_pop_reg (code, X86_ECX);
2367                                 else
2368                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2369                         }
2370                         code = emit_move_return_value (cfg, ins, code);
2371                         break;
2372                 case OP_FCALL_MEMBASE:
2373                 case OP_LCALL_MEMBASE:
2374                 case OP_VCALL_MEMBASE:
2375                 case OP_VOIDCALL_MEMBASE:
2376                 case OP_CALL_MEMBASE:
2377                         call = (MonoCallInst*)ins;
2378                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2379                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2380                                 if (call->stack_usage == 4)
2381                                         x86_pop_reg (code, X86_ECX);
2382                                 else
2383                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2384                         }
2385                         code = emit_move_return_value (cfg, ins, code);
2386                         break;
2387                 case OP_OUTARG:
2388                 case OP_X86_PUSH:
2389                         x86_push_reg (code, ins->sreg1);
2390                         break;
2391                 case OP_X86_PUSH_IMM:
2392                         x86_push_imm (code, ins->inst_imm);
2393                         break;
2394                 case OP_X86_PUSH_MEMBASE:
2395                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2396                         break;
2397                 case OP_X86_PUSH_OBJ: 
2398                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2399                         x86_push_reg (code, X86_EDI);
2400                         x86_push_reg (code, X86_ESI);
2401                         x86_push_reg (code, X86_ECX);
2402                         if (ins->inst_offset)
2403                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2404                         else
2405                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2406                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2407                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2408                         x86_cld (code);
2409                         x86_prefix (code, X86_REP_PREFIX);
2410                         x86_movsd (code);
2411                         x86_pop_reg (code, X86_ECX);
2412                         x86_pop_reg (code, X86_ESI);
2413                         x86_pop_reg (code, X86_EDI);
2414                         break;
2415                 case OP_X86_LEA:
2416                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2417                         break;
2418                 case OP_X86_LEA_MEMBASE:
2419                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2420                         break;
2421                 case OP_X86_XCHG:
2422                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2423                         break;
2424                 case OP_LOCALLOC:
2425                         /* keep alignment */
2426                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2427                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2428                         code = mono_emit_stack_alloc (code, ins);
2429                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2430                         break;
2431                 case CEE_RET:
2432                         x86_ret (code);
2433                         break;
2434                 case CEE_THROW: {
2435                         x86_push_reg (code, ins->sreg1);
2436                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2437                                                           (gpointer)"mono_arch_throw_exception");
2438                         break;
2439                 }
2440                 case OP_RETHROW: {
2441                         x86_push_reg (code, ins->sreg1);
2442                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2443                                                           (gpointer)"mono_arch_rethrow_exception");
2444                         break;
2445                 }
2446                 case OP_CALL_HANDLER: 
2447                         /* Align stack */
2448 #ifdef __APPLE__
2449                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2450 #endif
2451                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2452                         x86_call_imm (code, 0);
2453 #ifdef __APPLE__
2454                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2455 #endif
2456                         break;
2457                 case OP_LABEL:
2458                         ins->inst_c0 = code - cfg->native_code;
2459                         break;
2460                 case CEE_BR:
2461                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2462                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2463                         //break;
2464                         if (ins->flags & MONO_INST_BRLABEL) {
2465                                 if (ins->inst_i0->inst_c0) {
2466                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2467                                 } else {
2468                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2469                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2470                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2471                                                 x86_jump8 (code, 0);
2472                                         else 
2473                                                 x86_jump32 (code, 0);
2474                                 }
2475                         } else {
2476                                 if (ins->inst_target_bb->native_offset) {
2477                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2478                                 } else {
2479                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2480                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2481                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2482                                                 x86_jump8 (code, 0);
2483                                         else 
2484                                                 x86_jump32 (code, 0);
2485                                 } 
2486                         }
2487                         break;
2488                 case OP_BR_REG:
2489                         x86_jump_reg (code, ins->sreg1);
2490                         break;
2491                 case OP_CEQ:
2492                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2493                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2494                         break;
2495                 case OP_CLT:
2496                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2497                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2498                         break;
2499                 case OP_CLT_UN:
2500                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2501                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2502                         break;
2503                 case OP_CGT:
2504                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2505                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2506                         break;
2507                 case OP_CGT_UN:
2508                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2509                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2510                         break;
2511                 case OP_CNE:
2512                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2513                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2514                         break;
2515                 case OP_COND_EXC_EQ:
2516                 case OP_COND_EXC_NE_UN:
2517                 case OP_COND_EXC_LT:
2518                 case OP_COND_EXC_LT_UN:
2519                 case OP_COND_EXC_GT:
2520                 case OP_COND_EXC_GT_UN:
2521                 case OP_COND_EXC_GE:
2522                 case OP_COND_EXC_GE_UN:
2523                 case OP_COND_EXC_LE:
2524                 case OP_COND_EXC_LE_UN:
2525                 case OP_COND_EXC_OV:
2526                 case OP_COND_EXC_NO:
2527                 case OP_COND_EXC_C:
2528                 case OP_COND_EXC_NC:
2529                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2530                         break;
2531                 case CEE_BEQ:
2532                 case CEE_BNE_UN:
2533                 case CEE_BLT:
2534                 case CEE_BLT_UN:
2535                 case CEE_BGT:
2536                 case CEE_BGT_UN:
2537                 case CEE_BGE:
2538                 case CEE_BGE_UN:
2539                 case CEE_BLE:
2540                 case CEE_BLE_UN:
2541                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2542                         break;
2543
2544                 /* floating point opcodes */
2545                 case OP_R8CONST: {
2546                         double d = *(double *)ins->inst_p0;
2547
2548                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2549                                 x86_fldz (code);
2550                         } else if (d == 1.0) {
2551                                 x86_fld1 (code);
2552                         } else {
2553                                 if (cfg->compile_aot) {
2554                                         guint32 *val = (guint32*)&d;
2555                                         x86_push_imm (code, val [1]);
2556                                         x86_push_imm (code, val [0]);
2557                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2558                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2559                                 }
2560                                 else {
2561                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2562                                         x86_fld (code, NULL, TRUE);
2563                                 }
2564                         }
2565                         break;
2566                 }
2567                 case OP_R4CONST: {
2568                         float f = *(float *)ins->inst_p0;
2569
2570                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2571                                 x86_fldz (code);
2572                         } else if (f == 1.0) {
2573                                 x86_fld1 (code);
2574                         } else {
2575                                 if (cfg->compile_aot) {
2576                                         guint32 val = *(guint32*)&f;
2577                                         x86_push_imm (code, val);
2578                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2579                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2580                                 }
2581                                 else {
2582                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2583                                         x86_fld (code, NULL, FALSE);
2584                                 }
2585                         }
2586                         break;
2587                 }
2588                 case OP_STORER8_MEMBASE_REG:
2589                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2590                         break;
2591                 case OP_LOADR8_SPILL_MEMBASE:
2592                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2593                         x86_fxch (code, 1);
2594                         break;
2595                 case OP_LOADR8_MEMBASE:
2596                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2597                         break;
2598                 case OP_STORER4_MEMBASE_REG:
2599                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2600                         break;
2601                 case OP_LOADR4_MEMBASE:
2602                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2603                         break;
2604                 case CEE_CONV_R4: /* FIXME: change precision */
2605                 case CEE_CONV_R8:
2606                         x86_push_reg (code, ins->sreg1);
2607                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2608                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2609                         break;
2610                 case OP_X86_FP_LOAD_I8:
2611                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2612                         break;
2613                 case OP_X86_FP_LOAD_I4:
2614                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2615                         break;
2616                 case OP_FCONV_TO_I1:
2617                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2618                         break;
2619                 case OP_FCONV_TO_U1:
2620                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2621                         break;
2622                 case OP_FCONV_TO_I2:
2623                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2624                         break;
2625                 case OP_FCONV_TO_U2:
2626                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2627                         break;
2628                 case OP_FCONV_TO_I4:
2629                 case OP_FCONV_TO_I:
2630                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2631                         break;
2632                 case OP_FCONV_TO_I8:
2633                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2634                         x86_fnstcw_membase(code, X86_ESP, 0);
2635                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2636                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2637                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2638                         x86_fldcw_membase (code, X86_ESP, 2);
2639                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2640                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2641                         x86_pop_reg (code, ins->dreg);
2642                         x86_pop_reg (code, ins->backend.reg3);
2643                         x86_fldcw_membase (code, X86_ESP, 0);
2644                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2645                         break;
2646                 case OP_LCONV_TO_R_UN: { 
2647                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2648                         guint8 *br;
2649
2650                         /* load 64bit integer to FP stack */
2651                         x86_push_imm (code, 0);
2652                         x86_push_reg (code, ins->sreg2);
2653                         x86_push_reg (code, ins->sreg1);
2654                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2655                         /* store as 80bit FP value */
2656                         x86_fst80_membase (code, X86_ESP, 0);
2657                         
2658                         /* test if lreg is negative */
2659                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2660                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2661         
2662                         /* add correction constant mn */
2663                         x86_fld80_mem (code, mn);
2664                         x86_fld80_membase (code, X86_ESP, 0);
2665                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2666                         x86_fst80_membase (code, X86_ESP, 0);
2667
2668                         x86_patch (br, code);
2669
2670                         x86_fld80_membase (code, X86_ESP, 0);
2671                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2672
2673                         break;
2674                 }
2675                 case OP_LCONV_TO_OVF_I: {
2676                         guint8 *br [3], *label [1];
2677                         MonoInst *tins;
2678
2679                         /* 
2680                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2681                          */
2682                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2683
2684                         /* If the low word top bit is set, see if we are negative */
2685                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2686                         /* We are not negative (no top bit set, check for our top word to be zero */
2687                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2688                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2689                         label [0] = code;
2690
2691                         /* throw exception */
2692                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2693                         if (tins) {
2694                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2695                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2696                                         x86_jump8 (code, 0);
2697                                 else
2698                                         x86_jump32 (code, 0);
2699                         } else {
2700                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2701                                 x86_jump32 (code, 0);
2702                         }
2703         
2704         
2705                         x86_patch (br [0], code);
2706                         /* our top bit is set, check that top word is 0xfffffff */
2707                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2708                 
2709                         x86_patch (br [1], code);
2710                         /* nope, emit exception */
2711                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2712                         x86_patch (br [2], label [0]);
2713
2714                         if (ins->dreg != ins->sreg1)
2715                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2716                         break;
2717                 }
2718                 case OP_FADD:
2719                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2720                         break;
2721                 case OP_FSUB:
2722                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2723                         break;          
2724                 case OP_FMUL:
2725                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2726                         break;          
2727                 case OP_FDIV:
2728                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2729                         break;          
2730                 case OP_FNEG:
2731                         x86_fchs (code);
2732                         break;          
2733                 case OP_SIN:
2734                         x86_fsin (code);
2735                         x86_fldz (code);
2736                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2737                         break;          
2738                 case OP_COS:
2739                         x86_fcos (code);
2740                         x86_fldz (code);
2741                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2742                         break;          
2743                 case OP_ABS:
2744                         x86_fabs (code);
2745                         break;          
2746                 case OP_TAN: {
2747                         /* 
2748                          * it really doesn't make sense to inline all this code,
2749                          * it's here just to show that things may not be as simple 
2750                          * as they appear.
2751                          */
2752                         guchar *check_pos, *end_tan, *pop_jump;
2753                         x86_push_reg (code, X86_EAX);
2754                         x86_fptan (code);
2755                         x86_fnstsw (code);
2756                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2757                         check_pos = code;
2758                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2759                         x86_fstp (code, 0); /* pop the 1.0 */
2760                         end_tan = code;
2761                         x86_jump8 (code, 0);
2762                         x86_fldpi (code);
2763                         x86_fp_op (code, X86_FADD, 0);
2764                         x86_fxch (code, 1);
2765                         x86_fprem1 (code);
2766                         x86_fstsw (code);
2767                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2768                         pop_jump = code;
2769                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2770                         x86_fstp (code, 1);
2771                         x86_fptan (code);
2772                         x86_patch (pop_jump, code);
2773                         x86_fstp (code, 0); /* pop the 1.0 */
2774                         x86_patch (check_pos, code);
2775                         x86_patch (end_tan, code);
2776                         x86_fldz (code);
2777                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2778                         x86_pop_reg (code, X86_EAX);
2779                         break;
2780                 }
2781                 case OP_ATAN:
2782                         x86_fld1 (code);
2783                         x86_fpatan (code);
2784                         x86_fldz (code);
2785                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2786                         break;          
2787                 case OP_SQRT:
2788                         x86_fsqrt (code);
2789                         break;          
2790                 case OP_X86_FPOP:
2791                         x86_fstp (code, 0);
2792                         break;          
2793                 case OP_FREM: {
2794                         guint8 *l1, *l2;
2795
2796                         x86_push_reg (code, X86_EAX);
2797                         /* we need to exchange ST(0) with ST(1) */
2798                         x86_fxch (code, 1);
2799
2800                         /* this requires a loop, because fprem somtimes 
2801                          * returns a partial remainder */
2802                         l1 = code;
2803                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2804                         /* x86_fprem1 (code); */
2805                         x86_fprem (code);
2806                         x86_fnstsw (code);
2807                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
2808                         l2 = code + 2;
2809                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2810
2811                         /* pop result */
2812                         x86_fstp (code, 1);
2813
2814                         x86_pop_reg (code, X86_EAX);
2815                         break;
2816                 }
2817                 case OP_FCOMPARE:
2818                         if (cfg->opt & MONO_OPT_FCMOV) {
2819                                 x86_fcomip (code, 1);
2820                                 x86_fstp (code, 0);
2821                                 break;
2822                         }
2823                         /* this overwrites EAX */
2824                         EMIT_FPCOMPARE(code);
2825                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2826                         break;
2827                 case OP_FCEQ:
2828                         if (cfg->opt & MONO_OPT_FCMOV) {
2829                                 /* zeroing the register at the start results in 
2830                                  * shorter and faster code (we can also remove the widening op)
2831                                  */
2832                                 guchar *unordered_check;
2833                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2834                                 x86_fcomip (code, 1);
2835                                 x86_fstp (code, 0);
2836                                 unordered_check = code;
2837                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2838                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2839                                 x86_patch (unordered_check, code);
2840                                 break;
2841                         }
2842                         if (ins->dreg != X86_EAX) 
2843                                 x86_push_reg (code, X86_EAX);
2844
2845                         EMIT_FPCOMPARE(code);
2846                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2847                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2848                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2849                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2850
2851                         if (ins->dreg != X86_EAX) 
2852                                 x86_pop_reg (code, X86_EAX);
2853                         break;
2854                 case OP_FCLT:
2855                 case OP_FCLT_UN:
2856                         if (cfg->opt & MONO_OPT_FCMOV) {
2857                                 /* zeroing the register at the start results in 
2858                                  * shorter and faster code (we can also remove the widening op)
2859                                  */
2860                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2861                                 x86_fcomip (code, 1);
2862                                 x86_fstp (code, 0);
2863                                 if (ins->opcode == OP_FCLT_UN) {
2864                                         guchar *unordered_check = code;
2865                                         guchar *jump_to_end;
2866                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2867                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2868                                         jump_to_end = code;
2869                                         x86_jump8 (code, 0);
2870                                         x86_patch (unordered_check, code);
2871                                         x86_inc_reg (code, ins->dreg);
2872                                         x86_patch (jump_to_end, code);
2873                                 } else {
2874                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2875                                 }
2876                                 break;
2877                         }
2878                         if (ins->dreg != X86_EAX) 
2879                                 x86_push_reg (code, X86_EAX);
2880
2881                         EMIT_FPCOMPARE(code);
2882                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2883                         if (ins->opcode == OP_FCLT_UN) {
2884                                 guchar *is_not_zero_check, *end_jump;
2885                                 is_not_zero_check = code;
2886                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2887                                 end_jump = code;
2888                                 x86_jump8 (code, 0);
2889                                 x86_patch (is_not_zero_check, code);
2890                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2891
2892                                 x86_patch (end_jump, code);
2893                         }
2894                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2895                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2896
2897                         if (ins->dreg != X86_EAX) 
2898                                 x86_pop_reg (code, X86_EAX);
2899                         break;
2900                 case OP_FCGT:
2901                 case OP_FCGT_UN:
2902                         if (cfg->opt & MONO_OPT_FCMOV) {
2903                                 /* zeroing the register at the start results in 
2904                                  * shorter and faster code (we can also remove the widening op)
2905                                  */
2906                                 guchar *unordered_check;
2907                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2908                                 x86_fcomip (code, 1);
2909                                 x86_fstp (code, 0);
2910                                 if (ins->opcode == OP_FCGT) {
2911                                         unordered_check = code;
2912                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2913                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2914                                         x86_patch (unordered_check, code);
2915                                 } else {
2916                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2917                                 }
2918                                 break;
2919                         }
2920                         if (ins->dreg != X86_EAX) 
2921                                 x86_push_reg (code, X86_EAX);
2922
2923                         EMIT_FPCOMPARE(code);
2924                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2925                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2926                         if (ins->opcode == OP_FCGT_UN) {
2927                                 guchar *is_not_zero_check, *end_jump;
2928                                 is_not_zero_check = code;
2929                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2930                                 end_jump = code;
2931                                 x86_jump8 (code, 0);
2932                                 x86_patch (is_not_zero_check, code);
2933                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2934         
2935                                 x86_patch (end_jump, code);
2936                         }
2937                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2938                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2939
2940                         if (ins->dreg != X86_EAX) 
2941                                 x86_pop_reg (code, X86_EAX);
2942                         break;
2943                 case OP_FBEQ:
2944                         if (cfg->opt & MONO_OPT_FCMOV) {
2945                                 guchar *jump = code;
2946                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
2947                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2948                                 x86_patch (jump, code);
2949                                 break;
2950                         }
2951                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2952                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2953                         break;
2954                 case OP_FBNE_UN:
2955                         /* Branch if C013 != 100 */
2956                         if (cfg->opt & MONO_OPT_FCMOV) {
2957                                 /* branch if !ZF or (PF|CF) */
2958                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2959                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2960                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
2961                                 break;
2962                         }
2963                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2964                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2965                         break;
2966                 case OP_FBLT:
2967                         if (cfg->opt & MONO_OPT_FCMOV) {
2968                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2969                                 break;
2970                         }
2971                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2972                         break;
2973                 case OP_FBLT_UN:
2974                         if (cfg->opt & MONO_OPT_FCMOV) {
2975                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2976                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2977                                 break;
2978                         }
2979                         if (ins->opcode == OP_FBLT_UN) {
2980                                 guchar *is_not_zero_check, *end_jump;
2981                                 is_not_zero_check = code;
2982                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2983                                 end_jump = code;
2984                                 x86_jump8 (code, 0);
2985                                 x86_patch (is_not_zero_check, code);
2986                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2987
2988                                 x86_patch (end_jump, code);
2989                         }
2990                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2991                         break;
2992                 case OP_FBGT:
2993                 case OP_FBGT_UN:
2994                         if (cfg->opt & MONO_OPT_FCMOV) {
2995                                 if (ins->opcode == OP_FBGT) {
2996                                         guchar *br1;
2997
2998                                         /* skip branch if C1=1 */
2999                                         br1 = code;
3000                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3001                                         /* branch if (C0 | C3) = 1 */
3002                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3003                                         x86_patch (br1, code);
3004                                 } else {
3005                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3006                                 }
3007                                 break;
3008                         }
3009                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3010                         if (ins->opcode == OP_FBGT_UN) {
3011                                 guchar *is_not_zero_check, *end_jump;
3012                                 is_not_zero_check = code;
3013                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3014                                 end_jump = code;
3015                                 x86_jump8 (code, 0);
3016                                 x86_patch (is_not_zero_check, code);
3017                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3018
3019                                 x86_patch (end_jump, code);
3020                         }
3021                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3022                         break;
3023                 case OP_FBGE:
3024                         /* Branch if C013 == 100 or 001 */
3025                         if (cfg->opt & MONO_OPT_FCMOV) {
3026                                 guchar *br1;
3027
3028                                 /* skip branch if C1=1 */
3029                                 br1 = code;
3030                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3031                                 /* branch if (C0 | C3) = 1 */
3032                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3033                                 x86_patch (br1, code);
3034                                 break;
3035                         }
3036                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3037                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3038                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3039                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3040                         break;
3041                 case OP_FBGE_UN:
3042                         /* Branch if C013 == 000 */
3043                         if (cfg->opt & MONO_OPT_FCMOV) {
3044                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3045                                 break;
3046                         }
3047                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3048                         break;
3049                 case OP_FBLE:
3050                         /* Branch if C013=000 or 100 */
3051                         if (cfg->opt & MONO_OPT_FCMOV) {
3052                                 guchar *br1;
3053
3054                                 /* skip branch if C1=1 */
3055                                 br1 = code;
3056                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3057                                 /* branch if C0=0 */
3058                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3059                                 x86_patch (br1, code);
3060                                 break;
3061                         }
3062                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3063                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3064                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3065                         break;
3066                 case OP_FBLE_UN:
3067                         /* Branch if C013 != 001 */
3068                         if (cfg->opt & MONO_OPT_FCMOV) {
3069                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3070                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3071                                 break;
3072                         }
3073                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3074                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3075                         break;
3076                 case CEE_CKFINITE: {
3077                         x86_push_reg (code, X86_EAX);
3078                         x86_fxam (code);
3079                         x86_fnstsw (code);
3080                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3081                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3082                         x86_pop_reg (code, X86_EAX);
3083                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3084                         break;
3085                 }
3086                 case OP_TLS_GET: {
3087                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3088                         break;
3089                 }
3090                 case OP_MEMORY_BARRIER: {
3091                         /* Not needed on x86 */
3092                         break;
3093                 }
3094                 case OP_ATOMIC_ADD_I4: {
3095                         int dreg = ins->dreg;
3096
3097                         if (dreg == ins->inst_basereg) {
3098                                 x86_push_reg (code, ins->sreg2);
3099                                 dreg = ins->sreg2;
3100                         } 
3101                         
3102                         if (dreg != ins->sreg2)
3103                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3104
3105                         x86_prefix (code, X86_LOCK_PREFIX);
3106                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3107
3108                         if (dreg != ins->dreg) {
3109                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3110                                 x86_pop_reg (code, dreg);
3111                         }
3112
3113                         break;
3114                 }
3115                 case OP_ATOMIC_ADD_NEW_I4: {
3116                         int dreg = ins->dreg;
3117
3118                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3119                         if (ins->sreg2 == dreg) {
3120                                 if (dreg == X86_EBX) {
3121                                         dreg = X86_EDI;
3122                                         if (ins->inst_basereg == X86_EDI)
3123                                                 dreg = X86_ESI;
3124                                 } else {
3125                                         dreg = X86_EBX;
3126                                         if (ins->inst_basereg == X86_EBX)
3127                                                 dreg = X86_EDI;
3128                                 }
3129                         } else if (ins->inst_basereg == dreg) {
3130                                 if (dreg == X86_EBX) {
3131                                         dreg = X86_EDI;
3132                                         if (ins->sreg2 == X86_EDI)
3133                                                 dreg = X86_ESI;
3134                                 } else {
3135                                         dreg = X86_EBX;
3136                                         if (ins->sreg2 == X86_EBX)
3137                                                 dreg = X86_EDI;
3138                                 }
3139                         }
3140
3141                         if (dreg != ins->dreg) {
3142                                 x86_push_reg (code, dreg);
3143                         }
3144
3145                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3146                         x86_prefix (code, X86_LOCK_PREFIX);
3147                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3148                         /* dreg contains the old value, add with sreg2 value */
3149                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3150                         
3151                         if (ins->dreg != dreg) {
3152                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3153                                 x86_pop_reg (code, dreg);
3154                         }
3155
3156                         break;
3157                 }
3158                 case OP_ATOMIC_EXCHANGE_I4: {
3159                         guchar *br[2];
3160                         int sreg2 = ins->sreg2;
3161                         int breg = ins->inst_basereg;
3162
3163                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3164                          * hack to overcome limits in x86 reg allocator 
3165                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3166                          */
3167                         if (ins->dreg != X86_EAX)
3168                                 x86_push_reg (code, X86_EAX);
3169                         
3170                         /* We need the EAX reg for the cmpxchg */
3171                         if (ins->sreg2 == X86_EAX) {
3172                                 x86_push_reg (code, X86_EDX);
3173                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3174                                 sreg2 = X86_EDX;
3175                         }
3176
3177                         if (breg == X86_EAX) {
3178                                 x86_push_reg (code, X86_ESI);
3179                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3180                                 breg = X86_ESI;
3181                         }
3182
3183                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3184
3185                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3186                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3187                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3188                         x86_patch (br [1], br [0]);
3189
3190                         if (breg != ins->inst_basereg)
3191                                 x86_pop_reg (code, X86_ESI);
3192
3193                         if (ins->dreg != X86_EAX) {
3194                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3195                                 x86_pop_reg (code, X86_EAX);
3196                         }
3197
3198                         if (ins->sreg2 != sreg2)
3199                                 x86_pop_reg (code, X86_EDX);
3200
3201                         break;
3202                 }
3203                 default:
3204                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3205                         g_assert_not_reached ();
3206                 }
3207
3208                 if ((code - cfg->native_code - offset) > max_len) {
3209                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3210                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3211                         g_assert_not_reached ();
3212                 }
3213                
3214                 cpos += max_len;
3215
3216                 last_ins = ins;
3217                 last_offset = offset;
3218                 
3219                 ins = ins->next;
3220         }
3221
3222         cfg->code_len = code - cfg->native_code;
3223 }
3224
3225 void
3226 mono_arch_register_lowlevel_calls (void)
3227 {
3228 }
3229
3230 void
3231 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3232 {
3233         MonoJumpInfo *patch_info;
3234         gboolean compile_aot = !run_cctors;
3235
3236         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3237                 unsigned char *ip = patch_info->ip.i + code;
3238                 const unsigned char *target;
3239
3240                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3241
3242                 if (compile_aot) {
3243                         switch (patch_info->type) {
3244                         case MONO_PATCH_INFO_BB:
3245                         case MONO_PATCH_INFO_LABEL:
3246                                 break;
3247                         default:
3248                                 /* No need to patch these */
3249                                 continue;
3250                         }
3251                 }
3252
3253                 switch (patch_info->type) {
3254                 case MONO_PATCH_INFO_IP:
3255                         *((gconstpointer *)(ip)) = target;
3256                         break;
3257                 case MONO_PATCH_INFO_CLASS_INIT: {
3258                         guint8 *code = ip;
3259                         /* Might already been changed to a nop */
3260                         x86_call_code (code, 0);
3261                         x86_patch (ip, target);
3262                         break;
3263                 }
3264                 case MONO_PATCH_INFO_ABS:
3265                 case MONO_PATCH_INFO_METHOD:
3266                 case MONO_PATCH_INFO_METHOD_JUMP:
3267                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3268                 case MONO_PATCH_INFO_BB:
3269                 case MONO_PATCH_INFO_LABEL:
3270                         x86_patch (ip, target);
3271                         break;
3272                 case MONO_PATCH_INFO_NONE:
3273                         break;
3274                 default: {
3275                         guint32 offset = mono_arch_get_patch_offset (ip);
3276                         *((gconstpointer *)(ip + offset)) = target;
3277                         break;
3278                 }
3279                 }
3280         }
3281 }
3282
3283 guint8 *
3284 mono_arch_emit_prolog (MonoCompile *cfg)
3285 {
3286         MonoMethod *method = cfg->method;
3287         MonoBasicBlock *bb;
3288         MonoMethodSignature *sig;
3289         MonoInst *inst;
3290         int alloc_size, pos, max_offset, i;
3291         guint8 *code;
3292
3293         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3294         code = cfg->native_code = g_malloc (cfg->code_size);
3295
3296         x86_push_reg (code, X86_EBP);
3297         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3298
3299         alloc_size = cfg->stack_offset;
3300         pos = 0;
3301
3302         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3303                 /* Might need to attach the thread to the JIT */
3304                 if (lmf_tls_offset != -1) {
3305                         guint8 *buf;
3306
3307                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3308                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3309                         buf = code;
3310                         x86_branch8 (code, X86_CC_NE, 0, 0);
3311                         x86_push_imm (code, cfg->domain);
3312                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3313                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3314                         x86_patch (buf, code);
3315 #ifdef PLATFORM_WIN32
3316                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3317                         /* FIXME: Add a separate key for LMF to avoid this */
3318                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3319 #endif
3320                 } else {
3321                         g_assert (!cfg->compile_aot);
3322                         x86_push_imm (code, cfg->domain);
3323                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3324                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3325                 }
3326         }
3327
3328         if (method->save_lmf) {
3329                 pos += sizeof (MonoLMF);
3330
3331                 /* save the current IP */
3332                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3333                 x86_push_imm_template (code);
3334
3335                 /* save all caller saved regs */
3336                 x86_push_reg (code, X86_EBP);
3337                 x86_push_reg (code, X86_ESI);
3338                 x86_push_reg (code, X86_EDI);
3339                 x86_push_reg (code, X86_EBX);
3340
3341                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3342                         /*
3343                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3344                          * through the mono_lmf_addr TLS variable.
3345                          */
3346                         /* %eax = previous_lmf */
3347                         x86_prefix (code, X86_GS_PREFIX);
3348                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3349                         /* skip method_info + lmf */
3350                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3351                         /* push previous_lmf */
3352                         x86_push_reg (code, X86_EAX);
3353                         /* new lmf = ESP */
3354                         x86_prefix (code, X86_GS_PREFIX);
3355                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
3356                 } else {
3357                         /* get the address of lmf for the current thread */
3358                         /* 
3359                          * This is performance critical so we try to use some tricks to make
3360                          * it fast.
3361                          */                                                                        
3362
3363                         if (lmf_addr_tls_offset != -1) {
3364                                 /* Load lmf quicky using the GS register */
3365                                 code = emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
3366 #ifdef PLATFORM_WIN32
3367                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3368                                 /* FIXME: Add a separate key for LMF to avoid this */
3369                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3370 #endif
3371                         } else {
3372                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3373                         }
3374
3375                         /* Skip method info */
3376                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3377
3378                         /* push lmf */
3379                         x86_push_reg (code, X86_EAX); 
3380                         /* push *lfm (previous_lmf) */
3381                         x86_push_membase (code, X86_EAX, 0);
3382                         /* *(lmf) = ESP */
3383                         x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3384                 }
3385         } else {
3386
3387                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3388                         x86_push_reg (code, X86_EBX);
3389                         pos += 4;
3390                 }
3391
3392                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3393                         x86_push_reg (code, X86_EDI);
3394                         pos += 4;
3395                 }
3396
3397                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3398                         x86_push_reg (code, X86_ESI);
3399                         pos += 4;
3400                 }
3401         }
3402
3403         alloc_size -= pos;
3404
3405 #if __APPLE__
3406         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3407         {
3408                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3409                 if (tot & 4) {
3410                         tot += 4;
3411                         alloc_size += 4;
3412                 }
3413                 if (tot & 8) {
3414                         alloc_size += 8;
3415                 }
3416         }
3417 #endif
3418
3419         if (alloc_size) {
3420                 /* See mono_emit_stack_alloc */
3421 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3422                 guint32 remaining_size = alloc_size;
3423                 while (remaining_size >= 0x1000) {
3424                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3425                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3426                         remaining_size -= 0x1000;
3427                 }
3428                 if (remaining_size)
3429                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3430 #else
3431                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3432 #endif
3433         }
3434
3435 #if __APPLE_
3436         /* check the stack is aligned */
3437         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3438         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3439         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3440         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3441         x86_breakpoint (code);
3442 #endif
3443
3444         /* compute max_offset in order to use short forward jumps */
3445         max_offset = 0;
3446         if (cfg->opt & MONO_OPT_BRANCH) {
3447                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3448                         MonoInst *ins = bb->code;
3449                         bb->max_offset = max_offset;
3450
3451                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3452                                 max_offset += 6;
3453                         /* max alignment for loops */
3454                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3455                                 max_offset += LOOP_ALIGNMENT;
3456
3457                         while (ins) {
3458                                 if (ins->opcode == OP_LABEL)
3459                                         ins->inst_c1 = max_offset;
3460                                 
3461                                 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3462                                 ins = ins->next;
3463                         }
3464                 }
3465         }
3466
3467         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3468                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3469
3470         /* load arguments allocated to register from the stack */
3471         sig = mono_method_signature (method);
3472         pos = 0;
3473
3474         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3475                 inst = cfg->varinfo [pos];
3476                 if (inst->opcode == OP_REGVAR) {
3477                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3478                         if (cfg->verbose_level > 2)
3479                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3480                 }
3481                 pos++;
3482         }
3483
3484         cfg->code_len = code - cfg->native_code;
3485
3486         return code;
3487 }
3488
3489 void
3490 mono_arch_emit_epilog (MonoCompile *cfg)
3491 {
3492         MonoMethod *method = cfg->method;
3493         MonoMethodSignature *sig = mono_method_signature (method);
3494         int quad, pos;
3495         guint32 stack_to_pop;
3496         guint8 *code;
3497         int max_epilog_size = 16;
3498         CallInfo *cinfo;
3499         
3500         if (cfg->method->save_lmf)
3501                 max_epilog_size += 128;
3502         
3503         if (mono_jit_trace_calls != NULL)
3504                 max_epilog_size += 50;
3505
3506         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3507                 cfg->code_size *= 2;
3508                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3509                 mono_jit_stats.code_reallocs++;
3510         }
3511
3512         code = cfg->native_code + cfg->code_len;
3513
3514         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3515                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3516
3517         /* the code restoring the registers must be kept in sync with CEE_JMP */
3518         pos = 0;
3519         
3520         if (method->save_lmf) {
3521                 gint32 prev_lmf_reg;
3522                 gint32 lmf_offset = -sizeof (MonoLMF);
3523
3524                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3525                         /*
3526                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3527                          * through the mono_lmf_addr TLS variable.
3528                          */
3529                         /* reg = previous_lmf */
3530                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3531
3532                         /* lmf = previous_lmf */
3533                         x86_prefix (code, X86_GS_PREFIX);
3534                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
3535                 } else {
3536                         /* Find a spare register */
3537                         switch (sig->ret->type) {
3538                         case MONO_TYPE_I8:
3539                         case MONO_TYPE_U8:
3540                                 prev_lmf_reg = X86_EDI;
3541                                 cfg->used_int_regs |= (1 << X86_EDI);
3542                                 break;
3543                         default:
3544                                 prev_lmf_reg = X86_EDX;
3545                                 break;
3546                         }
3547
3548                         /* reg = previous_lmf */
3549                         x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3550
3551                         /* ecx = lmf */
3552                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3553
3554                         /* *(lmf) = previous_lmf */
3555                         x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3556                 }
3557
3558                 /* restore caller saved regs */
3559                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3560                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3561                 }
3562
3563                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3564                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3565                 }
3566                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3567                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3568                 }
3569
3570                 /* EBP is restored by LEAVE */
3571         } else {
3572                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3573                         pos -= 4;
3574                 }
3575                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3576                         pos -= 4;
3577                 }
3578                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3579                         pos -= 4;
3580                 }
3581
3582                 if (pos)
3583                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3584
3585                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3586                         x86_pop_reg (code, X86_ESI);
3587                 }
3588                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3589                         x86_pop_reg (code, X86_EDI);
3590                 }
3591                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3592                         x86_pop_reg (code, X86_EBX);
3593                 }
3594         }
3595
3596         /* Load returned vtypes into registers if needed */
3597         cinfo = get_call_info (sig, FALSE);
3598         if (cinfo->ret.storage == ArgValuetypeInReg) {
3599                 for (quad = 0; quad < 2; quad ++) {
3600                         switch (cinfo->ret.pair_storage [quad]) {
3601                         case ArgInIReg:
3602                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3603                                 break;
3604                         case ArgOnFloatFpStack:
3605                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3606                                 break;
3607                         case ArgOnDoubleFpStack:
3608                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3609                                 break;
3610                         case ArgNone:
3611                                 break;
3612                         default:
3613                                 g_assert_not_reached ();
3614                         }
3615                 }
3616         }
3617
3618         x86_leave (code);
3619
3620         if (CALLCONV_IS_STDCALL (sig)) {
3621                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3622
3623                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3624         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3625                 stack_to_pop = 4;
3626         else
3627                 stack_to_pop = 0;
3628
3629         if (stack_to_pop)
3630                 x86_ret_imm (code, stack_to_pop);
3631         else
3632                 x86_ret (code);
3633
3634         g_free (cinfo);
3635
3636         cfg->code_len = code - cfg->native_code;
3637
3638         g_assert (cfg->code_len < cfg->code_size);
3639 }
3640
3641 void
3642 mono_arch_emit_exceptions (MonoCompile *cfg)
3643 {
3644         MonoJumpInfo *patch_info;
3645         int nthrows, i;
3646         guint8 *code;
3647         MonoClass *exc_classes [16];
3648         guint8 *exc_throw_start [16], *exc_throw_end [16];
3649         guint32 code_size;
3650         int exc_count = 0;
3651
3652         /* Compute needed space */
3653         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3654                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3655                         exc_count++;
3656         }
3657
3658         /* 
3659          * make sure we have enough space for exceptions
3660          * 16 is the size of two push_imm instructions and a call
3661          */
3662         if (cfg->compile_aot)
3663                 code_size = exc_count * 32;
3664         else
3665                 code_size = exc_count * 16;
3666
3667         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3668                 cfg->code_size *= 2;
3669                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3670                 mono_jit_stats.code_reallocs++;
3671         }
3672
3673         code = cfg->native_code + cfg->code_len;
3674
3675         nthrows = 0;
3676         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3677                 switch (patch_info->type) {
3678                 case MONO_PATCH_INFO_EXC: {
3679                         MonoClass *exc_class;
3680                         guint8 *buf, *buf2;
3681                         guint32 throw_ip;
3682
3683                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3684
3685                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3686                         g_assert (exc_class);
3687                         throw_ip = patch_info->ip.i;
3688
3689                         /* Find a throw sequence for the same exception class */
3690                         for (i = 0; i < nthrows; ++i)
3691                                 if (exc_classes [i] == exc_class)
3692                                         break;
3693                         if (i < nthrows) {
3694                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3695                                 x86_jump_code (code, exc_throw_start [i]);
3696                                 patch_info->type = MONO_PATCH_INFO_NONE;
3697                         }
3698                         else {
3699                                 guint32 size;
3700
3701                                 /* Compute size of code following the push <OFFSET> */
3702                                 size = 5 + 5;
3703
3704                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3705                                         /* Use the shorter form */
3706                                         buf = buf2 = code;
3707                                         x86_push_imm (code, 0);
3708                                 }
3709                                 else {
3710                                         buf = code;
3711                                         x86_push_imm (code, 0xf0f0f0f0);
3712                                         buf2 = code;
3713                                 }
3714
3715                                 if (nthrows < 16) {
3716                                         exc_classes [nthrows] = exc_class;
3717                                         exc_throw_start [nthrows] = code;
3718                                 }
3719
3720                                 x86_push_imm (code, exc_class->type_token);
3721                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3722                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3723                                 patch_info->ip.i = code - cfg->native_code;
3724                                 x86_call_code (code, 0);
3725                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3726                                 while (buf < buf2)
3727                                         x86_nop (buf);
3728
3729                                 if (nthrows < 16) {
3730                                         exc_throw_end [nthrows] = code;
3731                                         nthrows ++;
3732                                 }
3733                         }
3734                         break;
3735                 }
3736                 default:
3737                         /* do nothing */
3738                         break;
3739                 }
3740         }
3741
3742         cfg->code_len = code - cfg->native_code;
3743
3744         g_assert (cfg->code_len < cfg->code_size);
3745 }
3746
3747 void
3748 mono_arch_flush_icache (guint8 *code, gint size)
3749 {
3750         /* not needed */
3751 }
3752
3753 void
3754 mono_arch_flush_register_windows (void)
3755 {
3756 }
3757
3758 /*
3759  * Support for fast access to the thread-local lmf structure using the GS
3760  * segment register on NPTL + kernel 2.6.x.
3761  */
3762
3763 static gboolean tls_offset_inited = FALSE;
3764
3765 void
3766 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3767 {
3768         if (!tls_offset_inited) {
3769                 if (!getenv ("MONO_NO_TLS")) {
3770 #ifdef PLATFORM_WIN32
3771                         /* 
3772                          * We need to init this multiple times, since when we are first called, the key might not
3773                          * be initialized yet.
3774                          */
3775                         appdomain_tls_offset = mono_domain_get_tls_key ();
3776                         lmf_tls_offset = mono_get_jit_tls_key ();
3777                         thread_tls_offset = mono_thread_get_tls_key ();
3778
3779                         /* Only 64 tls entries can be accessed using inline code */
3780                         if (appdomain_tls_offset >= 64)
3781                                 appdomain_tls_offset = -1;
3782                         if (lmf_tls_offset >= 64)
3783                                 lmf_tls_offset = -1;
3784                         if (thread_tls_offset >= 64)
3785                                 thread_tls_offset = -1;
3786 #else
3787 #if MONO_XEN_OPT
3788                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
3789 #endif
3790                         tls_offset_inited = TRUE;
3791                         appdomain_tls_offset = mono_domain_get_tls_offset ();
3792                         lmf_tls_offset = mono_get_lmf_tls_offset ();
3793                         lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
3794                         thread_tls_offset = mono_thread_get_tls_offset ();
3795 #endif
3796                 }
3797         }               
3798 }
3799
3800 void
3801 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
3802 {
3803 }
3804
3805 void
3806 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3807 {
3808         MonoCallInst *call = (MonoCallInst*)inst;
3809         CallInfo *cinfo = get_call_info (inst->signature, FALSE);
3810
3811         /* add the this argument */
3812         if (this_reg != -1) {
3813                 if (cinfo->args [0].storage == ArgInIReg) {
3814                         MonoInst *this;
3815                         MONO_INST_NEW (cfg, this, OP_MOVE);
3816                         this->type = this_type;
3817                         this->sreg1 = this_reg;
3818                         this->dreg = mono_regstate_next_int (cfg->rs);
3819                         mono_bblock_add_inst (cfg->cbb, this);
3820
3821                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
3822                 }
3823                 else {
3824                         MonoInst *this;
3825                         MONO_INST_NEW (cfg, this, OP_OUTARG);
3826                         this->type = this_type;
3827                         this->sreg1 = this_reg;
3828                         mono_bblock_add_inst (cfg->cbb, this);
3829                 }
3830         }
3831
3832         if (vt_reg != -1) {
3833                 MonoInst *vtarg;
3834
3835                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3836                         /*
3837                          * The valuetype is in EAX:EDX after the call, needs to be copied to
3838                          * the stack. Save the address here, so the call instruction can
3839                          * access it.
3840                          */
3841                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
3842                         vtarg->inst_destbasereg = X86_ESP;
3843                         vtarg->inst_offset = inst->stack_usage;
3844                         vtarg->sreg1 = vt_reg;
3845                         mono_bblock_add_inst (cfg->cbb, vtarg);
3846                 }
3847                 else if (cinfo->ret.storage == ArgInIReg) {
3848                         /* The return address is passed in a register */
3849                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
3850                         vtarg->sreg1 = vt_reg;
3851                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
3852                         mono_bblock_add_inst (cfg->cbb, vtarg);
3853
3854                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
3855                 } else {
3856                         MonoInst *vtarg;
3857                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3858                         vtarg->type = STACK_MP;
3859                         vtarg->sreg1 = vt_reg;
3860                         mono_bblock_add_inst (cfg->cbb, vtarg);
3861                 }
3862         }
3863
3864         g_free (cinfo);
3865 }
3866
3867 MonoInst*
3868 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3869 {
3870         MonoInst *ins = NULL;
3871
3872         if (cmethod->klass == mono_defaults.math_class) {
3873                 if (strcmp (cmethod->name, "Sin") == 0) {
3874                         MONO_INST_NEW (cfg, ins, OP_SIN);
3875                         ins->inst_i0 = args [0];
3876                 } else if (strcmp (cmethod->name, "Cos") == 0) {
3877                         MONO_INST_NEW (cfg, ins, OP_COS);
3878                         ins->inst_i0 = args [0];
3879                 } else if (strcmp (cmethod->name, "Tan") == 0) {
3880                         MONO_INST_NEW (cfg, ins, OP_TAN);
3881                         ins->inst_i0 = args [0];
3882                 } else if (strcmp (cmethod->name, "Atan") == 0) {
3883                         MONO_INST_NEW (cfg, ins, OP_ATAN);
3884                         ins->inst_i0 = args [0];
3885                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
3886                         MONO_INST_NEW (cfg, ins, OP_SQRT);
3887                         ins->inst_i0 = args [0];
3888                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
3889                         MONO_INST_NEW (cfg, ins, OP_ABS);
3890                         ins->inst_i0 = args [0];
3891                 }
3892 #if 0
3893                 /* OP_FREM is not IEEE compatible */
3894                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
3895                         MONO_INST_NEW (cfg, ins, OP_FREM);
3896                         ins->inst_i0 = args [0];
3897                         ins->inst_i1 = args [1];
3898                 }
3899 #endif
3900         } else if (cmethod->klass == mono_defaults.thread_class &&
3901                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
3902                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
3903         } else if(cmethod->klass->image == mono_defaults.corlib &&
3904                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
3905                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
3906
3907                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3908                         MonoInst *ins_iconst;
3909
3910                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3911                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3912                         ins_iconst->inst_c0 = 1;
3913
3914                         ins->inst_i0 = args [0];
3915                         ins->inst_i1 = ins_iconst;
3916                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3917                         MonoInst *ins_iconst;
3918
3919                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3920                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3921                         ins_iconst->inst_c0 = -1;
3922
3923                         ins->inst_i0 = args [0];
3924                         ins->inst_i1 = ins_iconst;
3925                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3926                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
3927
3928                         ins->inst_i0 = args [0];
3929                         ins->inst_i1 = args [1];
3930                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3931                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3932
3933                         ins->inst_i0 = args [0];
3934                         ins->inst_i1 = args [1];
3935                 }
3936         }
3937
3938         return ins;
3939 }
3940
3941
3942 gboolean
3943 mono_arch_print_tree (MonoInst *tree, int arity)
3944 {
3945         return 0;
3946 }
3947
3948 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
3949 {
3950         MonoInst* ins;
3951         
3952         if (appdomain_tls_offset == -1)
3953                 return NULL;
3954
3955         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3956         ins->inst_offset = appdomain_tls_offset;
3957         return ins;
3958 }
3959
3960 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
3961 {
3962         MonoInst* ins;
3963
3964         if (thread_tls_offset == -1)
3965                 return NULL;
3966
3967         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3968         ins->inst_offset = thread_tls_offset;
3969         return ins;
3970 }
3971
3972 guint32
3973 mono_arch_get_patch_offset (guint8 *code)
3974 {
3975         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
3976                 return 2;
3977         else if ((code [0] == 0xba))
3978                 return 1;
3979         else if ((code [0] == 0x68))
3980                 /* push IMM */
3981                 return 1;
3982         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
3983                 /* push <OFFSET>(<REG>) */
3984                 return 2;
3985         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
3986                 /* call *<OFFSET>(<REG>) */
3987                 return 2;
3988         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
3989                 /* fldl <ADDR> */
3990                 return 2;
3991         else if ((code [0] == 0x58) && (code [1] == 0x05))
3992                 /* pop %eax; add <OFFSET>, %eax */
3993                 return 2;
3994         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
3995                 /* pop <REG>; add <OFFSET>, <REG> */
3996                 return 3;
3997         else {
3998                 g_assert_not_reached ();
3999                 return -1;
4000         }
4001 }
4002
4003 gpointer*
4004 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4005 {
4006         guint8 reg = 0;
4007         gint32 disp = 0;
4008
4009         /* go to the start of the call instruction
4010          *
4011          * address_byte = (m << 6) | (o << 3) | reg
4012          * call opcode: 0xff address_byte displacement
4013          * 0xff m=1,o=2 imm8
4014          * 0xff m=2,o=2 imm32
4015          */
4016         code -= 6;
4017
4018         /* 
4019          * A given byte sequence can match more than case here, so we have to be
4020          * really careful about the ordering of the cases. Longer sequences
4021          * come first.
4022          */
4023         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
4024                 /*
4025                  * This is an interface call
4026                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
4027                  * ff 10                   call   *(%eax)
4028                  */
4029                 reg = x86_modrm_rm (code [5]);
4030                 disp = 0;
4031         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4032                 reg = code [4] & 0x07;
4033                 disp = (signed char)code [5];
4034         } else {
4035                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4036                         reg = code [1] & 0x07;
4037                         disp = *((gint32*)(code + 2));
4038                 } else if ((code [1] == 0xe8)) {
4039                         return NULL;
4040                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4041                         /*
4042                          * This is a interface call
4043                          * 8b 40 30   mov    0x30(%eax),%eax
4044                          * ff 10      call   *(%eax)
4045                          */
4046                         disp = 0;
4047                         reg = code [5] & 0x07;
4048                 }
4049                 else
4050                         return NULL;
4051         }
4052
4053         return (gpointer*)(((gint32)(regs [reg])) + disp);
4054 }
4055
4056 gpointer* 
4057 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
4058 {
4059         guint8 reg = 0;
4060         gint32 disp = 0;
4061
4062         code -= 7;
4063         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
4064                 reg = x86_modrm_rm (code [1]);
4065                 disp = code [4];
4066
4067                 if (reg == X86_EAX)
4068                         return NULL;
4069                 else
4070                         return (gpointer*)(((gint32)(regs [reg])) + disp);
4071         }
4072
4073         return NULL;
4074 }