New test.
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #include <unistd.h>
15
16 #include <mono/metadata/appdomain.h>
17 #include <mono/metadata/debug-helpers.h>
18 #include <mono/metadata/threads.h>
19 #include <mono/metadata/profiler-private.h>
20 #include <mono/utils/mono-math.h>
21
22 #include "trace.h"
23 #include "mini-x86.h"
24 #include "inssel.h"
25 #include "cpu-x86.h"
26
27 /* On windows, these hold the key returned by TlsAlloc () */
28 static gint lmf_tls_offset = -1;
29 static gint lmf_addr_tls_offset = -1;
30 static gint appdomain_tls_offset = -1;
31 static gint thread_tls_offset = -1;
32
33 #ifdef MONO_XEN_OPT
34 static gboolean optimize_for_xen = TRUE;
35 #else
36 #define optimize_for_xen 0
37 #endif
38
39 #ifdef PLATFORM_WIN32
40 static gboolean is_win32 = TRUE;
41 #else
42 static gboolean is_win32 = FALSE;
43 #endif
44
45 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
46
47 #define ARGS_OFFSET 8
48
49 #ifdef PLATFORM_WIN32
50 /* Under windows, the default pinvoke calling convention is stdcall */
51 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
52 #else
53 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
54 #endif
55
56 #define NOT_IMPLEMENTED g_assert_not_reached ()
57
58 const char*
59 mono_arch_regname (int reg) {
60         switch (reg) {
61         case X86_EAX: return "%eax";
62         case X86_EBX: return "%ebx";
63         case X86_ECX: return "%ecx";
64         case X86_EDX: return "%edx";
65         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
66         case X86_EDI: return "%edi";
67         case X86_ESI: return "%esi";
68         }
69         return "unknown";
70 }
71
72 const char*
73 mono_arch_fregname (int reg) {
74         return "unknown";
75 }
76
77 typedef enum {
78         ArgInIReg,
79         ArgInFloatSSEReg,
80         ArgInDoubleSSEReg,
81         ArgOnStack,
82         ArgValuetypeInReg,
83         ArgOnFloatFpStack,
84         ArgOnDoubleFpStack,
85         ArgNone
86 } ArgStorage;
87
88 typedef struct {
89         gint16 offset;
90         gint8  reg;
91         ArgStorage storage;
92
93         /* Only if storage == ArgValuetypeInReg */
94         ArgStorage pair_storage [2];
95         gint8 pair_regs [2];
96 } ArgInfo;
97
98 typedef struct {
99         int nargs;
100         guint32 stack_usage;
101         guint32 reg_usage;
102         guint32 freg_usage;
103         gboolean need_stack_align;
104         guint32 stack_align_amount;
105         ArgInfo ret;
106         ArgInfo sig_cookie;
107         ArgInfo args [1];
108 } CallInfo;
109
110 #define PARAM_REGS 0
111
112 #define FLOAT_PARAM_REGS 0
113
114 static X86_Reg_No param_regs [] = { 0 };
115
116 #ifdef PLATFORM_WIN32
117 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
118 #endif
119
120 static void inline
121 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
122 {
123     ainfo->offset = *stack_size;
124
125     if (*gr >= PARAM_REGS) {
126                 ainfo->storage = ArgOnStack;
127                 (*stack_size) += sizeof (gpointer);
128     }
129     else {
130                 ainfo->storage = ArgInIReg;
131                 ainfo->reg = param_regs [*gr];
132                 (*gr) ++;
133     }
134 }
135
136 static void inline
137 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
138 {
139         ainfo->offset = *stack_size;
140
141         g_assert (PARAM_REGS == 0);
142         
143         ainfo->storage = ArgOnStack;
144         (*stack_size) += sizeof (gpointer) * 2;
145 }
146
147 static void inline
148 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
149 {
150     ainfo->offset = *stack_size;
151
152     if (*gr >= FLOAT_PARAM_REGS) {
153                 ainfo->storage = ArgOnStack;
154                 (*stack_size) += is_double ? 8 : 4;
155     }
156     else {
157                 /* A double register */
158                 if (is_double)
159                         ainfo->storage = ArgInDoubleSSEReg;
160                 else
161                         ainfo->storage = ArgInFloatSSEReg;
162                 ainfo->reg = *gr;
163                 (*gr) += 1;
164     }
165 }
166
167
168 static void
169 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
170                gboolean is_return,
171                guint32 *gr, guint32 *fr, guint32 *stack_size)
172 {
173         guint32 size;
174         MonoClass *klass;
175
176         klass = mono_class_from_mono_type (type);
177         if (sig->pinvoke) 
178                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
179         else 
180                 size = mono_type_stack_size (&klass->byval_arg, NULL);
181
182 #ifdef PLATFORM_WIN32
183         if (sig->pinvoke && is_return) {
184                 MonoMarshalType *info;
185
186                 /*
187                  * the exact rules are not very well documented, the code below seems to work with the 
188                  * code generated by gcc 3.3.3 -mno-cygwin.
189                  */
190                 info = mono_marshal_load_type_info (klass);
191                 g_assert (info);
192
193                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
194
195                 /* Special case structs with only a float member */
196                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
197                         ainfo->storage = ArgValuetypeInReg;
198                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
199                         return;
200                 }
201                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
202                         ainfo->storage = ArgValuetypeInReg;
203                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
204                         return;
205                 }               
206                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
207                         ainfo->storage = ArgValuetypeInReg;
208                         ainfo->pair_storage [0] = ArgInIReg;
209                         ainfo->pair_regs [0] = return_regs [0];
210                         if (info->native_size > 4) {
211                                 ainfo->pair_storage [1] = ArgInIReg;
212                                 ainfo->pair_regs [1] = return_regs [1];
213                         }
214                         return;
215                 }
216         }
217 #endif
218
219         ainfo->offset = *stack_size;
220         ainfo->storage = ArgOnStack;
221         *stack_size += ALIGN_TO (size, sizeof (gpointer));
222 }
223
224 /*
225  * get_call_info:
226  *
227  *  Obtain information about a call according to the calling convention.
228  * For x86 ELF, see the "System V Application Binary Interface Intel386 
229  * Architecture Processor Supplment, Fourth Edition" document for more
230  * information.
231  * For x86 win32, see ???.
232  */
233 static CallInfo*
234 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
235 {
236         guint32 i, gr, fr;
237         MonoType *ret_type;
238         int n = sig->hasthis + sig->param_count;
239         guint32 stack_size = 0;
240         CallInfo *cinfo;
241
242         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
243
244         gr = 0;
245         fr = 0;
246
247         /* return value */
248         {
249                 ret_type = mono_type_get_underlying_type (sig->ret);
250                 switch (ret_type->type) {
251                 case MONO_TYPE_BOOLEAN:
252                 case MONO_TYPE_I1:
253                 case MONO_TYPE_U1:
254                 case MONO_TYPE_I2:
255                 case MONO_TYPE_U2:
256                 case MONO_TYPE_CHAR:
257                 case MONO_TYPE_I4:
258                 case MONO_TYPE_U4:
259                 case MONO_TYPE_I:
260                 case MONO_TYPE_U:
261                 case MONO_TYPE_PTR:
262                 case MONO_TYPE_FNPTR:
263                 case MONO_TYPE_CLASS:
264                 case MONO_TYPE_OBJECT:
265                 case MONO_TYPE_SZARRAY:
266                 case MONO_TYPE_ARRAY:
267                 case MONO_TYPE_STRING:
268                         cinfo->ret.storage = ArgInIReg;
269                         cinfo->ret.reg = X86_EAX;
270                         break;
271                 case MONO_TYPE_U8:
272                 case MONO_TYPE_I8:
273                         cinfo->ret.storage = ArgInIReg;
274                         cinfo->ret.reg = X86_EAX;
275                         break;
276                 case MONO_TYPE_R4:
277                         cinfo->ret.storage = ArgOnFloatFpStack;
278                         break;
279                 case MONO_TYPE_R8:
280                         cinfo->ret.storage = ArgOnDoubleFpStack;
281                         break;
282                 case MONO_TYPE_GENERICINST:
283                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
284                                 cinfo->ret.storage = ArgInIReg;
285                                 cinfo->ret.reg = X86_EAX;
286                                 break;
287                         }
288                         /* Fall through */
289                 case MONO_TYPE_VALUETYPE: {
290                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
291
292                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
293                         if (cinfo->ret.storage == ArgOnStack)
294                                 /* The caller passes the address where the value is stored */
295                                 add_general (&gr, &stack_size, &cinfo->ret);
296                         break;
297                 }
298                 case MONO_TYPE_TYPEDBYREF:
299                         /* Same as a valuetype with size 24 */
300                         add_general (&gr, &stack_size, &cinfo->ret);
301                         ;
302                         break;
303                 case MONO_TYPE_VOID:
304                         cinfo->ret.storage = ArgNone;
305                         break;
306                 default:
307                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
308                 }
309         }
310
311         /* this */
312         if (sig->hasthis)
313                 add_general (&gr, &stack_size, cinfo->args + 0);
314
315         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
316                 gr = PARAM_REGS;
317                 fr = FLOAT_PARAM_REGS;
318                 
319                 /* Emit the signature cookie just before the implicit arguments */
320                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
321         }
322
323         for (i = 0; i < sig->param_count; ++i) {
324                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
325                 MonoType *ptype;
326
327                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
328                         /* We allways pass the sig cookie on the stack for simplicity */
329                         /* 
330                          * Prevent implicit arguments + the sig cookie from being passed 
331                          * in registers.
332                          */
333                         gr = PARAM_REGS;
334                         fr = FLOAT_PARAM_REGS;
335
336                         /* Emit the signature cookie just before the implicit arguments */
337                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
338                 }
339
340                 if (sig->params [i]->byref) {
341                         add_general (&gr, &stack_size, ainfo);
342                         continue;
343                 }
344                 ptype = mono_type_get_underlying_type (sig->params [i]);
345                 switch (ptype->type) {
346                 case MONO_TYPE_BOOLEAN:
347                 case MONO_TYPE_I1:
348                 case MONO_TYPE_U1:
349                         add_general (&gr, &stack_size, ainfo);
350                         break;
351                 case MONO_TYPE_I2:
352                 case MONO_TYPE_U2:
353                 case MONO_TYPE_CHAR:
354                         add_general (&gr, &stack_size, ainfo);
355                         break;
356                 case MONO_TYPE_I4:
357                 case MONO_TYPE_U4:
358                         add_general (&gr, &stack_size, ainfo);
359                         break;
360                 case MONO_TYPE_I:
361                 case MONO_TYPE_U:
362                 case MONO_TYPE_PTR:
363                 case MONO_TYPE_FNPTR:
364                 case MONO_TYPE_CLASS:
365                 case MONO_TYPE_OBJECT:
366                 case MONO_TYPE_STRING:
367                 case MONO_TYPE_SZARRAY:
368                 case MONO_TYPE_ARRAY:
369                         add_general (&gr, &stack_size, ainfo);
370                         break;
371                 case MONO_TYPE_GENERICINST:
372                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
373                                 add_general (&gr, &stack_size, ainfo);
374                                 break;
375                         }
376                         /* Fall through */
377                 case MONO_TYPE_VALUETYPE:
378                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
379                         break;
380                 case MONO_TYPE_TYPEDBYREF:
381                         stack_size += sizeof (MonoTypedRef);
382                         ainfo->storage = ArgOnStack;
383                         break;
384                 case MONO_TYPE_U8:
385                 case MONO_TYPE_I8:
386                         add_general_pair (&gr, &stack_size, ainfo);
387                         break;
388                 case MONO_TYPE_R4:
389                         add_float (&fr, &stack_size, ainfo, FALSE);
390                         break;
391                 case MONO_TYPE_R8:
392                         add_float (&fr, &stack_size, ainfo, TRUE);
393                         break;
394                 default:
395                         g_error ("unexpected type 0x%x", ptype->type);
396                         g_assert_not_reached ();
397                 }
398         }
399
400         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
401                 gr = PARAM_REGS;
402                 fr = FLOAT_PARAM_REGS;
403                 
404                 /* Emit the signature cookie just before the implicit arguments */
405                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
406         }
407
408 #if defined(__APPLE__)
409         if ((stack_size % 16) != 0) { 
410                 cinfo->need_stack_align = TRUE;
411                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
412         }
413 #endif
414
415         cinfo->stack_usage = stack_size;
416         cinfo->reg_usage = gr;
417         cinfo->freg_usage = fr;
418         return cinfo;
419 }
420
421 /*
422  * mono_arch_get_argument_info:
423  * @csig:  a method signature
424  * @param_count: the number of parameters to consider
425  * @arg_info: an array to store the result infos
426  *
427  * Gathers information on parameters such as size, alignment and
428  * padding. arg_info should be large enought to hold param_count + 1 entries. 
429  *
430  * Returns the size of the activation frame.
431  */
432 int
433 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
434 {
435         int k, frame_size = 0;
436         int size, pad;
437         guint32 align;
438         int offset = 8;
439         CallInfo *cinfo;
440
441         cinfo = get_call_info (csig, FALSE);
442
443         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
444                 frame_size += sizeof (gpointer);
445                 offset += 4;
446         }
447
448         arg_info [0].offset = offset;
449
450         if (csig->hasthis) {
451                 frame_size += sizeof (gpointer);
452                 offset += 4;
453         }
454
455         arg_info [0].size = frame_size;
456
457         for (k = 0; k < param_count; k++) {
458                 
459                 if (csig->pinvoke)
460                         size = mono_type_native_stack_size (csig->params [k], &align);
461                 else {
462                         int ialign;
463                         size = mono_type_stack_size (csig->params [k], &ialign);
464                         align = ialign;
465                 }
466
467                 /* ignore alignment for now */
468                 align = 1;
469
470                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
471                 arg_info [k].pad = pad;
472                 frame_size += size;
473                 arg_info [k + 1].pad = 0;
474                 arg_info [k + 1].size = size;
475                 offset += pad;
476                 arg_info [k + 1].offset = offset;
477                 offset += size;
478         }
479
480         align = MONO_ARCH_FRAME_ALIGNMENT;
481         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
482         arg_info [k].pad = pad;
483
484         g_free (cinfo);
485
486         return frame_size;
487 }
488
489 static const guchar cpuid_impl [] = {
490         0x55,                           /* push   %ebp */
491         0x89, 0xe5,                     /* mov    %esp,%ebp */
492         0x53,                           /* push   %ebx */
493         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
494         0x0f, 0xa2,                     /* cpuid   */
495         0x50,                           /* push   %eax */
496         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
497         0x89, 0x18,                     /* mov    %ebx,(%eax) */
498         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
499         0x89, 0x08,                     /* mov    %ecx,(%eax) */
500         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
501         0x89, 0x10,                     /* mov    %edx,(%eax) */
502         0x58,                           /* pop    %eax */
503         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
504         0x89, 0x02,                     /* mov    %eax,(%edx) */
505         0x5b,                           /* pop    %ebx */
506         0xc9,                           /* leave   */
507         0xc3,                           /* ret     */
508 };
509
510 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
511
512 static int 
513 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
514 {
515         int have_cpuid = 0;
516 #ifndef _MSC_VER
517         __asm__  __volatile__ (
518                 "pushfl\n"
519                 "popl %%eax\n"
520                 "movl %%eax, %%edx\n"
521                 "xorl $0x200000, %%eax\n"
522                 "pushl %%eax\n"
523                 "popfl\n"
524                 "pushfl\n"
525                 "popl %%eax\n"
526                 "xorl %%edx, %%eax\n"
527                 "andl $0x200000, %%eax\n"
528                 "movl %%eax, %0"
529                 : "=r" (have_cpuid)
530                 :
531                 : "%eax", "%edx"
532         );
533 #else
534         __asm {
535                 pushfd
536                 pop eax
537                 mov edx, eax
538                 xor eax, 0x200000
539                 push eax
540                 popfd
541                 pushfd
542                 pop eax
543                 xor eax, edx
544                 and eax, 0x200000
545                 mov have_cpuid, eax
546         }
547 #endif
548         if (have_cpuid) {
549                 /* Have to use the code manager to get around WinXP DEP */
550                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
551                 CpuidFunc func;
552                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
553                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
554
555                 func = (CpuidFunc)ptr;
556                 func (id, p_eax, p_ebx, p_ecx, p_edx);
557
558                 mono_code_manager_destroy (codeman);
559
560                 /*
561                  * We use this approach because of issues with gcc and pic code, see:
562                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
563                 __asm__ __volatile__ ("cpuid"
564                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
565                         : "a" (id));
566                 */
567                 return 1;
568         }
569         return 0;
570 }
571
572 /*
573  * Initialize the cpu to execute managed code.
574  */
575 void
576 mono_arch_cpu_init (void)
577 {
578         /* spec compliance requires running with double precision */
579 #ifndef _MSC_VER
580         guint16 fpcw;
581
582         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
583         fpcw &= ~X86_FPCW_PRECC_MASK;
584         fpcw |= X86_FPCW_PREC_DOUBLE;
585         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
586         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
587 #else
588         _control87 (_PC_53, MCW_PC);
589 #endif
590 }
591
592 /*
593  * This function returns the optimizations supported on this cpu.
594  */
595 guint32
596 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
597 {
598         int eax, ebx, ecx, edx;
599         guint32 opts = 0;
600         
601         *exclude_mask = 0;
602         /* Feature Flags function, flags returned in EDX. */
603         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
604                 if (edx & (1 << 15)) {
605                         opts |= MONO_OPT_CMOV;
606                         if (edx & 1)
607                                 opts |= MONO_OPT_FCMOV;
608                         else
609                                 *exclude_mask |= MONO_OPT_FCMOV;
610                 } else
611                         *exclude_mask |= MONO_OPT_CMOV;
612         }
613         return opts;
614 }
615
616 /*
617  * Determine whenever the trap whose info is in SIGINFO is caused by
618  * integer overflow.
619  */
620 gboolean
621 mono_arch_is_int_overflow (void *sigctx, void *info)
622 {
623         MonoContext ctx;
624         guint8* ip;
625
626         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
627
628         ip = (guint8*)ctx.eip;
629
630         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
631                 gint32 reg;
632
633                 /* idiv REG */
634                 switch (x86_modrm_rm (ip [1])) {
635                 case X86_EAX:
636                         reg = ctx.eax;
637                         break;
638                 case X86_ECX:
639                         reg = ctx.ecx;
640                         break;
641                 case X86_EDX:
642                         reg = ctx.edx;
643                         break;
644                 case X86_EBX:
645                         reg = ctx.ebx;
646                         break;
647                 case X86_ESI:
648                         reg = ctx.esi;
649                         break;
650                 case X86_EDI:
651                         reg = ctx.edi;
652                         break;
653                 default:
654                         g_assert_not_reached ();
655                         reg = -1;
656                 }
657
658                 if (reg == -1)
659                         return TRUE;
660         }
661                         
662         return FALSE;
663 }
664
665 static gboolean
666 is_regsize_var (MonoType *t) {
667         if (t->byref)
668                 return TRUE;
669         switch (mono_type_get_underlying_type (t)->type) {
670         case MONO_TYPE_I4:
671         case MONO_TYPE_U4:
672         case MONO_TYPE_I:
673         case MONO_TYPE_U:
674         case MONO_TYPE_PTR:
675         case MONO_TYPE_FNPTR:
676                 return TRUE;
677         case MONO_TYPE_OBJECT:
678         case MONO_TYPE_STRING:
679         case MONO_TYPE_CLASS:
680         case MONO_TYPE_SZARRAY:
681         case MONO_TYPE_ARRAY:
682                 return TRUE;
683         case MONO_TYPE_GENERICINST:
684                 if (!mono_type_generic_inst_is_valuetype (t))
685                         return TRUE;
686                 return FALSE;
687         case MONO_TYPE_VALUETYPE:
688                 return FALSE;
689         }
690         return FALSE;
691 }
692
693 GList *
694 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
695 {
696         GList *vars = NULL;
697         int i;
698
699         for (i = 0; i < cfg->num_varinfo; i++) {
700                 MonoInst *ins = cfg->varinfo [i];
701                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
702
703                 /* unused vars */
704                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
705                         continue;
706
707                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
708                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
709                         continue;
710
711                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
712                  * 8bit quantities in caller saved registers on x86 */
713                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
714                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
715                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
716                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
717                         g_assert (i == vmv->idx);
718                         vars = g_list_prepend (vars, vmv);
719                 }
720         }
721
722         vars = mono_varlist_sort (cfg, vars, 0);
723
724         return vars;
725 }
726
727 GList *
728 mono_arch_get_global_int_regs (MonoCompile *cfg)
729 {
730         GList *regs = NULL;
731
732         /* we can use 3 registers for global allocation */
733         regs = g_list_prepend (regs, (gpointer)X86_EBX);
734         regs = g_list_prepend (regs, (gpointer)X86_ESI);
735         regs = g_list_prepend (regs, (gpointer)X86_EDI);
736
737         return regs;
738 }
739
740 /*
741  * mono_arch_regalloc_cost:
742  *
743  *  Return the cost, in number of memory references, of the action of 
744  * allocating the variable VMV into a register during global register
745  * allocation.
746  */
747 guint32
748 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
749 {
750         MonoInst *ins = cfg->varinfo [vmv->idx];
751
752         if (cfg->method->save_lmf)
753                 /* The register is already saved */
754                 return (ins->opcode == OP_ARG) ? 1 : 0;
755         else
756                 /* push+pop+possible load if it is an argument */
757                 return (ins->opcode == OP_ARG) ? 3 : 2;
758 }
759  
760 /*
761  * Set var information according to the calling convention. X86 version.
762  * The locals var stuff should most likely be split in another method.
763  */
764 void
765 mono_arch_allocate_vars (MonoCompile *cfg)
766 {
767         MonoMethodSignature *sig;
768         MonoMethodHeader *header;
769         MonoInst *inst;
770         guint32 locals_stack_size, locals_stack_align;
771         int i, offset;
772         gint32 *offsets;
773         CallInfo *cinfo;
774
775         header = mono_method_get_header (cfg->method);
776         sig = mono_method_signature (cfg->method);
777
778         cinfo = get_call_info (sig, FALSE);
779
780         cfg->frame_reg = MONO_ARCH_BASEREG;
781         offset = 0;
782
783         /* Reserve space to save LMF and caller saved registers */
784
785         if (cfg->method->save_lmf) {
786                 offset += sizeof (MonoLMF);
787         } else {
788                 if (cfg->used_int_regs & (1 << X86_EBX)) {
789                         offset += 4;
790                 }
791
792                 if (cfg->used_int_regs & (1 << X86_EDI)) {
793                         offset += 4;
794                 }
795
796                 if (cfg->used_int_regs & (1 << X86_ESI)) {
797                         offset += 4;
798                 }
799         }
800
801         switch (cinfo->ret.storage) {
802         case ArgValuetypeInReg:
803                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
804                 offset += 8;
805                 cfg->ret->opcode = OP_REGOFFSET;
806                 cfg->ret->inst_basereg = X86_EBP;
807                 cfg->ret->inst_offset = - offset;
808                 break;
809         default:
810                 break;
811         }
812
813         /* Allocate locals */
814         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
815         if (locals_stack_align) {
816                 offset += (locals_stack_align - 1);
817                 offset &= ~(locals_stack_align - 1);
818         }
819         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
820                 if (offsets [i] != -1) {
821                         MonoInst *inst = cfg->varinfo [i];
822                         inst->opcode = OP_REGOFFSET;
823                         inst->inst_basereg = X86_EBP;
824                         inst->inst_offset = - (offset + offsets [i]);
825                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
826                 }
827         }
828         g_free (offsets);
829         offset += locals_stack_size;
830
831
832         /*
833          * Allocate arguments+return value
834          */
835
836         switch (cinfo->ret.storage) {
837         case ArgOnStack:
838                 cfg->ret->opcode = OP_REGOFFSET;
839                 cfg->ret->inst_basereg = X86_EBP;
840                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
841                 break;
842         case ArgValuetypeInReg:
843                 break;
844         case ArgInIReg:
845                 cfg->ret->opcode = OP_REGVAR;
846                 cfg->ret->inst_c0 = cinfo->ret.reg;
847                 break;
848         case ArgNone:
849         case ArgOnFloatFpStack:
850         case ArgOnDoubleFpStack:
851                 break;
852         default:
853                 g_assert_not_reached ();
854         }
855
856         if (sig->call_convention == MONO_CALL_VARARG) {
857                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
858                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
859         }
860
861         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
862                 ArgInfo *ainfo = &cinfo->args [i];
863                 inst = cfg->varinfo [i];
864                 if (inst->opcode != OP_REGVAR) {
865                         inst->opcode = OP_REGOFFSET;
866                         inst->inst_basereg = X86_EBP;
867                 }
868                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
869         }
870
871         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
872         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
873
874         cfg->stack_offset = offset;
875
876         g_free (cinfo);
877 }
878
879 void
880 mono_arch_create_vars (MonoCompile *cfg)
881 {
882         MonoMethodSignature *sig;
883         CallInfo *cinfo;
884
885         sig = mono_method_signature (cfg->method);
886
887         cinfo = get_call_info (sig, FALSE);
888
889         if (cinfo->ret.storage == ArgValuetypeInReg)
890                 cfg->ret_var_is_local = TRUE;
891
892         g_free (cinfo);
893 }
894
895 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
896  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
897  */
898
899 static void
900 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
901 {
902         MonoInst *arg;
903         MonoMethodSignature *tmp_sig;
904         MonoInst *sig_arg;
905
906         /* FIXME: Add support for signature tokens to AOT */
907         cfg->disable_aot = TRUE;
908         MONO_INST_NEW (cfg, arg, OP_OUTARG);
909
910         /*
911          * mono_ArgIterator_Setup assumes the signature cookie is 
912          * passed first and all the arguments which were before it are
913          * passed on the stack after the signature. So compensate by 
914          * passing a different signature.
915          */
916         tmp_sig = mono_metadata_signature_dup (call->signature);
917         tmp_sig->param_count -= call->signature->sentinelpos;
918         tmp_sig->sentinelpos = 0;
919         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
920
921         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
922         sig_arg->inst_p0 = tmp_sig;
923
924         arg->inst_left = sig_arg;
925         arg->type = STACK_PTR;
926         /* prepend, so they get reversed */
927         arg->next = call->out_args;
928         call->out_args = arg;
929 }
930
931 /* 
932  * take the arguments and generate the arch-specific
933  * instructions to properly call the function in call.
934  * This includes pushing, moving arguments to the right register
935  * etc.
936  */
937 MonoCallInst*
938 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
939         MonoInst *arg, *in;
940         MonoMethodSignature *sig;
941         int i, n;
942         CallInfo *cinfo;
943         int sentinelpos = 0;
944
945         sig = call->signature;
946         n = sig->param_count + sig->hasthis;
947
948         cinfo = get_call_info (sig, FALSE);
949
950         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
951                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
952
953         for (i = 0; i < n; ++i) {
954                 ArgInfo *ainfo = cinfo->args + i;
955
956                 /* Emit the signature cookie just before the implicit arguments */
957                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
958                         emit_sig_cookie (cfg, call);
959                 }
960
961                 if (is_virtual && i == 0) {
962                         /* the argument will be attached to the call instrucion */
963                         in = call->args [i];
964                 } else {
965                         MonoType *t;
966
967                         if (i >= sig->hasthis)
968                                 t = sig->params [i - sig->hasthis];
969                         else
970                                 t = &mono_defaults.int_class->byval_arg;
971                         t = mono_type_get_underlying_type (t);
972
973                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
974                         in = call->args [i];
975                         arg->cil_code = in->cil_code;
976                         arg->inst_left = in;
977                         arg->type = in->type;
978                         /* prepend, so they get reversed */
979                         arg->next = call->out_args;
980                         call->out_args = arg;
981
982                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
983                                 guint32 size, align;
984
985                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
986                                         size = sizeof (MonoTypedRef);
987                                         align = sizeof (gpointer);
988                                 }
989                                 else
990                                         if (sig->pinvoke)
991                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
992                                         else {
993                                                 int ialign;
994                                                 size = mono_type_stack_size (&in->klass->byval_arg, &ialign);
995                                                 align = ialign;
996                                         }
997                                 arg->opcode = OP_OUTARG_VT;
998                                 arg->klass = in->klass;
999                                 arg->backend.is_pinvoke = sig->pinvoke;
1000                                 arg->inst_imm = size; 
1001                         }
1002                         else {
1003                                 switch (ainfo->storage) {
1004                                 case ArgOnStack:
1005                                         arg->opcode = OP_OUTARG;
1006                                         if (!t->byref) {
1007                                                 if (t->type == MONO_TYPE_R4)
1008                                                         arg->opcode = OP_OUTARG_R4;
1009                                                 else
1010                                                         if (t->type == MONO_TYPE_R8)
1011                                                                 arg->opcode = OP_OUTARG_R8;
1012                                         }
1013                                         break;
1014                                 default:
1015                                         g_assert_not_reached ();
1016                                 }
1017                         }
1018                 }
1019         }
1020
1021         /* Handle the case where there are no implicit arguments */
1022         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1023                 emit_sig_cookie (cfg, call);
1024         }
1025
1026         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1027                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1028                         MonoInst *zero_inst;
1029                         /*
1030                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1031                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1032                          * before calling the function. So we add a dummy instruction to represent pushing the 
1033                          * struct return address to the stack. The return address will be saved to this stack slot 
1034                          * by the code emitted in this_vret_args.
1035                          */
1036                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1037                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1038                         zero_inst->inst_p0 = 0;
1039                         arg->inst_left = zero_inst;
1040                         arg->type = STACK_PTR;
1041                         /* prepend, so they get reversed */
1042                         arg->next = call->out_args;
1043                         call->out_args = arg;
1044                 }
1045                 else
1046                         /* if the function returns a struct, the called method already does a ret $0x4 */
1047                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1048                                 cinfo->stack_usage -= 4;
1049         }
1050         
1051         call->stack_usage = cinfo->stack_usage;
1052
1053 #if defined(__APPLE__)
1054         if (cinfo->need_stack_align) {
1055                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1056                 arg->inst_c0 = cinfo->stack_align_amount;
1057                 arg->next = call->out_args;
1058                 call->out_args = arg;
1059         }
1060 #endif 
1061
1062         g_free (cinfo);
1063
1064         return call;
1065 }
1066
1067 /*
1068  * Allow tracing to work with this interface (with an optional argument)
1069  */
1070 void*
1071 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1072 {
1073         guchar *code = p;
1074
1075         /* if some args are passed in registers, we need to save them here */
1076         x86_push_reg (code, X86_EBP);
1077
1078         if (cfg->compile_aot) {
1079                 x86_push_imm (code, cfg->method);
1080                 x86_mov_reg_imm (code, X86_EAX, func);
1081                 x86_call_reg (code, X86_EAX);
1082         } else {
1083                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1084                 x86_push_imm (code, cfg->method);
1085                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1086                 x86_call_code (code, 0);
1087         }
1088         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1089
1090         return code;
1091 }
1092
1093 enum {
1094         SAVE_NONE,
1095         SAVE_STRUCT,
1096         SAVE_EAX,
1097         SAVE_EAX_EDX,
1098         SAVE_FP
1099 };
1100
1101 void*
1102 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1103 {
1104         guchar *code = p;
1105         int arg_size = 0, save_mode = SAVE_NONE;
1106         MonoMethod *method = cfg->method;
1107         
1108         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1109         case MONO_TYPE_VOID:
1110                 /* special case string .ctor icall */
1111                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1112                         save_mode = SAVE_EAX;
1113                 else
1114                         save_mode = SAVE_NONE;
1115                 break;
1116         case MONO_TYPE_I8:
1117         case MONO_TYPE_U8:
1118                 save_mode = SAVE_EAX_EDX;
1119                 break;
1120         case MONO_TYPE_R4:
1121         case MONO_TYPE_R8:
1122                 save_mode = SAVE_FP;
1123                 break;
1124         case MONO_TYPE_GENERICINST:
1125                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1126                         save_mode = SAVE_EAX;
1127                         break;
1128                 }
1129                 /* Fall through */
1130         case MONO_TYPE_VALUETYPE:
1131                 save_mode = SAVE_STRUCT;
1132                 break;
1133         default:
1134                 save_mode = SAVE_EAX;
1135                 break;
1136         }
1137
1138         switch (save_mode) {
1139         case SAVE_EAX_EDX:
1140                 x86_push_reg (code, X86_EDX);
1141                 x86_push_reg (code, X86_EAX);
1142                 if (enable_arguments) {
1143                         x86_push_reg (code, X86_EDX);
1144                         x86_push_reg (code, X86_EAX);
1145                         arg_size = 8;
1146                 }
1147                 break;
1148         case SAVE_EAX:
1149                 x86_push_reg (code, X86_EAX);
1150                 if (enable_arguments) {
1151                         x86_push_reg (code, X86_EAX);
1152                         arg_size = 4;
1153                 }
1154                 break;
1155         case SAVE_FP:
1156                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1157                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1158                 if (enable_arguments) {
1159                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1160                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1161                         arg_size = 8;
1162                 }
1163                 break;
1164         case SAVE_STRUCT:
1165                 if (enable_arguments) {
1166                         x86_push_membase (code, X86_EBP, 8);
1167                         arg_size = 4;
1168                 }
1169                 break;
1170         case SAVE_NONE:
1171         default:
1172                 break;
1173         }
1174
1175         if (cfg->compile_aot) {
1176                 x86_push_imm (code, method);
1177                 x86_mov_reg_imm (code, X86_EAX, func);
1178                 x86_call_reg (code, X86_EAX);
1179         } else {
1180                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1181                 x86_push_imm (code, method);
1182                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1183                 x86_call_code (code, 0);
1184         }
1185         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1186
1187         switch (save_mode) {
1188         case SAVE_EAX_EDX:
1189                 x86_pop_reg (code, X86_EAX);
1190                 x86_pop_reg (code, X86_EDX);
1191                 break;
1192         case SAVE_EAX:
1193                 x86_pop_reg (code, X86_EAX);
1194                 break;
1195         case SAVE_FP:
1196                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1197                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1198                 break;
1199         case SAVE_NONE:
1200         default:
1201                 break;
1202         }
1203
1204         return code;
1205 }
1206
1207 #define EMIT_COND_BRANCH(ins,cond,sign) \
1208 if (ins->flags & MONO_INST_BRLABEL) { \
1209         if (ins->inst_i0->inst_c0) { \
1210                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1211         } else { \
1212                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1213                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1214                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1215                         x86_branch8 (code, cond, 0, sign); \
1216                 else \
1217                         x86_branch32 (code, cond, 0, sign); \
1218         } \
1219 } else { \
1220         if (ins->inst_true_bb->native_offset) { \
1221                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1222         } else { \
1223                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1224                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1225                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1226                         x86_branch8 (code, cond, 0, sign); \
1227                 else \
1228                         x86_branch32 (code, cond, 0, sign); \
1229         } \
1230 }
1231
1232 /*  
1233  *      Emit an exception if condition is fail and
1234  *  if possible do a directly branch to target 
1235  */
1236 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1237         do {                                                        \
1238                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1239                 if (tins == NULL) {                                                                             \
1240                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1241                                         MONO_PATCH_INFO_EXC, exc_name);  \
1242                         x86_branch32 (code, cond, 0, signed);               \
1243                 } else {        \
1244                         EMIT_COND_BRANCH (tins, cond, signed);  \
1245                 }                       \
1246         } while (0); 
1247
1248 #define EMIT_FPCOMPARE(code) do { \
1249         x86_fcompp (code); \
1250         x86_fnstsw (code); \
1251 } while (0); 
1252
1253
1254 static guint8*
1255 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1256 {
1257         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1258         x86_call_code (code, 0);
1259
1260         return code;
1261 }
1262
1263 /* FIXME: Add more instructions */
1264 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1265
1266 static void
1267 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1268 {
1269         MonoInst *ins, *last_ins = NULL;
1270         ins = bb->code;
1271
1272         while (ins) {
1273
1274                 switch (ins->opcode) {
1275                 case OP_ICONST:
1276                         /* reg = 0 -> XOR (reg, reg) */
1277                         /* XOR sets cflags on x86, so we cant do it always */
1278                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1279                                 ins->opcode = CEE_XOR;
1280                                 ins->sreg1 = ins->dreg;
1281                                 ins->sreg2 = ins->dreg;
1282                         }
1283                         break;
1284                 case OP_MUL_IMM: 
1285                         /* remove unnecessary multiplication with 1 */
1286                         if (ins->inst_imm == 1) {
1287                                 if (ins->dreg != ins->sreg1) {
1288                                         ins->opcode = OP_MOVE;
1289                                 } else {
1290                                         last_ins->next = ins->next;
1291                                         ins = ins->next;
1292                                         continue;
1293                                 }
1294                         }
1295                         break;
1296                 case OP_COMPARE_IMM:
1297                         /* OP_COMPARE_IMM (reg, 0) 
1298                          * --> 
1299                          * OP_X86_TEST_NULL (reg) 
1300                          */
1301                         if (!ins->inst_imm)
1302                                 ins->opcode = OP_X86_TEST_NULL;
1303                         break;
1304                 case OP_X86_COMPARE_MEMBASE_IMM:
1305                         /* 
1306                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1307                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1308                          * -->
1309                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1310                          * OP_COMPARE_IMM reg, imm
1311                          *
1312                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1313                          */
1314                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1315                             ins->inst_basereg == last_ins->inst_destbasereg &&
1316                             ins->inst_offset == last_ins->inst_offset) {
1317                                         ins->opcode = OP_COMPARE_IMM;
1318                                         ins->sreg1 = last_ins->sreg1;
1319
1320                                         /* check if we can remove cmp reg,0 with test null */
1321                                         if (!ins->inst_imm)
1322                                                 ins->opcode = OP_X86_TEST_NULL;
1323                                 }
1324
1325                         break;
1326                 case OP_LOAD_MEMBASE:
1327                 case OP_LOADI4_MEMBASE:
1328                         /* 
1329                          * Note: if reg1 = reg2 the load op is removed
1330                          *
1331                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1332                          * OP_LOAD_MEMBASE offset(basereg), reg2
1333                          * -->
1334                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1335                          * OP_MOVE reg1, reg2
1336                          */
1337                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1338                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1339                             ins->inst_basereg == last_ins->inst_destbasereg &&
1340                             ins->inst_offset == last_ins->inst_offset) {
1341                                 if (ins->dreg == last_ins->sreg1) {
1342                                         last_ins->next = ins->next;                             
1343                                         ins = ins->next;                                
1344                                         continue;
1345                                 } else {
1346                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1347                                         ins->opcode = OP_MOVE;
1348                                         ins->sreg1 = last_ins->sreg1;
1349                                 }
1350
1351                         /* 
1352                          * Note: reg1 must be different from the basereg in the second load
1353                          * Note: if reg1 = reg2 is equal then second load is removed
1354                          *
1355                          * OP_LOAD_MEMBASE offset(basereg), reg1
1356                          * OP_LOAD_MEMBASE offset(basereg), reg2
1357                          * -->
1358                          * OP_LOAD_MEMBASE offset(basereg), reg1
1359                          * OP_MOVE reg1, reg2
1360                          */
1361                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1362                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1363                               ins->inst_basereg != last_ins->dreg &&
1364                               ins->inst_basereg == last_ins->inst_basereg &&
1365                               ins->inst_offset == last_ins->inst_offset) {
1366
1367                                 if (ins->dreg == last_ins->dreg) {
1368                                         last_ins->next = ins->next;                             
1369                                         ins = ins->next;                                
1370                                         continue;
1371                                 } else {
1372                                         ins->opcode = OP_MOVE;
1373                                         ins->sreg1 = last_ins->dreg;
1374                                 }
1375
1376                                 //g_assert_not_reached ();
1377
1378 #if 0
1379                         /* 
1380                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1381                          * OP_LOAD_MEMBASE offset(basereg), reg
1382                          * -->
1383                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1384                          * OP_ICONST reg, imm
1385                          */
1386                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1387                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1388                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1389                                    ins->inst_offset == last_ins->inst_offset) {
1390                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1391                                 ins->opcode = OP_ICONST;
1392                                 ins->inst_c0 = last_ins->inst_imm;
1393                                 g_assert_not_reached (); // check this rule
1394 #endif
1395                         }
1396                         break;
1397                 case OP_LOADU1_MEMBASE:
1398                 case OP_LOADI1_MEMBASE:
1399                         /* 
1400                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1401                          * OP_LOAD_MEMBASE offset(basereg), reg2
1402                          * -->
1403                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1404                          * CONV_I2/U2 reg1, reg2
1405                          */
1406                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1407                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1408                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1409                                         ins->inst_offset == last_ins->inst_offset) {
1410                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1411                                 ins->sreg1 = last_ins->sreg1;
1412                         }
1413                         break;
1414                 case OP_LOADU2_MEMBASE:
1415                 case OP_LOADI2_MEMBASE:
1416                         /* 
1417                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1418                          * OP_LOAD_MEMBASE offset(basereg), reg2
1419                          * -->
1420                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1421                          * CONV_I2/U2 reg1, reg2
1422                          */
1423                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1424                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1425                                         ins->inst_offset == last_ins->inst_offset) {
1426                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1427                                 ins->sreg1 = last_ins->sreg1;
1428                         }
1429                         break;
1430                 case CEE_CONV_I4:
1431                 case CEE_CONV_U4:
1432                 case OP_MOVE:
1433                         /*
1434                          * Removes:
1435                          *
1436                          * OP_MOVE reg, reg 
1437                          */
1438                         if (ins->dreg == ins->sreg1) {
1439                                 if (last_ins)
1440                                         last_ins->next = ins->next;                             
1441                                 ins = ins->next;
1442                                 continue;
1443                         }
1444                         /* 
1445                          * Removes:
1446                          *
1447                          * OP_MOVE sreg, dreg 
1448                          * OP_MOVE dreg, sreg
1449                          */
1450                         if (last_ins && last_ins->opcode == OP_MOVE &&
1451                             ins->sreg1 == last_ins->dreg &&
1452                             ins->dreg == last_ins->sreg1) {
1453                                 last_ins->next = ins->next;                             
1454                                 ins = ins->next;                                
1455                                 continue;
1456                         }
1457                         break;
1458                         
1459                 case OP_X86_PUSH_MEMBASE:
1460                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1461                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1462                             ins->inst_basereg == last_ins->inst_destbasereg &&
1463                             ins->inst_offset == last_ins->inst_offset) {
1464                                     ins->opcode = OP_X86_PUSH;
1465                                     ins->sreg1 = last_ins->sreg1;
1466                         }
1467                         break;
1468                 }
1469                 last_ins = ins;
1470                 ins = ins->next;
1471         }
1472         bb->last_ins = last_ins;
1473 }
1474
1475 static const int 
1476 branch_cc_table [] = {
1477         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1478         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1479         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1480 };
1481
1482 static const char*const * ins_spec = x86_desc;
1483
1484 /*#include "cprop.c"*/
1485 void
1486 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1487 {
1488         mono_local_regalloc (cfg, bb);
1489 }
1490
1491 static unsigned char*
1492 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1493 {
1494         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1495         x86_fnstcw_membase(code, X86_ESP, 0);
1496         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1497         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1498         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1499         x86_fldcw_membase (code, X86_ESP, 2);
1500         if (size == 8) {
1501                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1502                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1503                 x86_pop_reg (code, dreg);
1504                 /* FIXME: need the high register 
1505                  * x86_pop_reg (code, dreg_high);
1506                  */
1507         } else {
1508                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1509                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1510                 x86_pop_reg (code, dreg);
1511         }
1512         x86_fldcw_membase (code, X86_ESP, 0);
1513         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1514
1515         if (size == 1)
1516                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1517         else if (size == 2)
1518                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1519         return code;
1520 }
1521
1522 static unsigned char*
1523 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1524 {
1525         int sreg = tree->sreg1;
1526         int need_touch = FALSE;
1527
1528 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1529         need_touch = TRUE;
1530 #endif
1531
1532         if (need_touch) {
1533                 guint8* br[5];
1534
1535                 /*
1536                  * Under Windows:
1537                  * If requested stack size is larger than one page,
1538                  * perform stack-touch operation
1539                  */
1540                 /*
1541                  * Generate stack probe code.
1542                  * Under Windows, it is necessary to allocate one page at a time,
1543                  * "touching" stack after each successful sub-allocation. This is
1544                  * because of the way stack growth is implemented - there is a
1545                  * guard page before the lowest stack page that is currently commited.
1546                  * Stack normally grows sequentially so OS traps access to the
1547                  * guard page and commits more pages when needed.
1548                  */
1549                 x86_test_reg_imm (code, sreg, ~0xFFF);
1550                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1551
1552                 br[2] = code; /* loop */
1553                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1554                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1555
1556                 /* 
1557                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1558                  * that follows only initializes the last part of the area.
1559                  */
1560                 /* Same as the init code below with size==0x1000 */
1561                 if (tree->flags & MONO_INST_INIT) {
1562                         x86_push_reg (code, X86_EAX);
1563                         x86_push_reg (code, X86_ECX);
1564                         x86_push_reg (code, X86_EDI);
1565                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1566                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1567                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1568                         x86_cld (code);
1569                         x86_prefix (code, X86_REP_PREFIX);
1570                         x86_stosl (code);
1571                         x86_pop_reg (code, X86_EDI);
1572                         x86_pop_reg (code, X86_ECX);
1573                         x86_pop_reg (code, X86_EAX);
1574                 }
1575
1576                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1577                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1578                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1579                 x86_patch (br[3], br[2]);
1580                 x86_test_reg_reg (code, sreg, sreg);
1581                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1582                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1583
1584                 br[1] = code; x86_jump8 (code, 0);
1585
1586                 x86_patch (br[0], code);
1587                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1588                 x86_patch (br[1], code);
1589                 x86_patch (br[4], code);
1590         }
1591         else
1592                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1593
1594         if (tree->flags & MONO_INST_INIT) {
1595                 int offset = 0;
1596                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1597                         x86_push_reg (code, X86_EAX);
1598                         offset += 4;
1599                 }
1600                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1601                         x86_push_reg (code, X86_ECX);
1602                         offset += 4;
1603                 }
1604                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1605                         x86_push_reg (code, X86_EDI);
1606                         offset += 4;
1607                 }
1608                 
1609                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1610                 if (sreg != X86_ECX)
1611                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1612                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1613                                 
1614                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1615                 x86_cld (code);
1616                 x86_prefix (code, X86_REP_PREFIX);
1617                 x86_stosl (code);
1618                 
1619                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1620                         x86_pop_reg (code, X86_EDI);
1621                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1622                         x86_pop_reg (code, X86_ECX);
1623                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1624                         x86_pop_reg (code, X86_EAX);
1625         }
1626         return code;
1627 }
1628
1629
1630 static guint8*
1631 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1632 {
1633         CallInfo *cinfo;
1634         int quad;
1635
1636         /* Move return value to the target register */
1637         switch (ins->opcode) {
1638         case CEE_CALL:
1639         case OP_CALL_REG:
1640         case OP_CALL_MEMBASE:
1641                 if (ins->dreg != X86_EAX)
1642                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1643                 break;
1644         case OP_VCALL:
1645         case OP_VCALL_REG:
1646         case OP_VCALL_MEMBASE:
1647                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
1648                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1649                         /* Pop the destination address from the stack */
1650                         x86_pop_reg (code, X86_ECX);
1651                         
1652                         for (quad = 0; quad < 2; quad ++) {
1653                                 switch (cinfo->ret.pair_storage [quad]) {
1654                                 case ArgInIReg:
1655                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1656                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1657                                         break;
1658                                 case ArgNone:
1659                                         break;
1660                                 default:
1661                                         g_assert_not_reached ();
1662                                 }
1663                         }
1664                 }
1665                 g_free (cinfo);
1666         default:
1667                 break;
1668         }
1669
1670         return code;
1671 }
1672
1673 /*
1674  * emit_tls_get:
1675  * @code: buffer to store code to
1676  * @dreg: hard register where to place the result
1677  * @tls_offset: offset info
1678  *
1679  * emit_tls_get emits in @code the native code that puts in the dreg register
1680  * the item in the thread local storage identified by tls_offset.
1681  *
1682  * Returns: a pointer to the end of the stored code
1683  */
1684 static guint8*
1685 emit_tls_get (guint8* code, int dreg, int tls_offset)
1686 {
1687 #ifdef PLATFORM_WIN32
1688         /* 
1689          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1690          * Journal and/or a disassembly of the TlsGet () function.
1691          */
1692         g_assert (tls_offset < 64);
1693         x86_prefix (code, X86_FS_PREFIX);
1694         x86_mov_reg_mem (code, dreg, 0x18, 4);
1695         /* Dunno what this does but TlsGetValue () contains it */
1696         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1697         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1698 #else
1699         if (optimize_for_xen) {
1700                 x86_prefix (code, X86_GS_PREFIX);
1701                 x86_mov_reg_mem (code, dreg, 0, 4);
1702                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1703         } else {
1704                 x86_prefix (code, X86_GS_PREFIX);
1705                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1706         }
1707 #endif
1708         return code;
1709 }
1710
1711 /*
1712  * emit_load_volatile_arguments:
1713  *
1714  *  Load volatile arguments from the stack to the original input registers.
1715  * Required before a tail call.
1716  */
1717 static guint8*
1718 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
1719 {
1720         MonoMethod *method = cfg->method;
1721         MonoMethodSignature *sig;
1722         MonoInst *inst;
1723         CallInfo *cinfo;
1724         guint32 i;
1725
1726         /* FIXME: Generate intermediate code instead */
1727
1728         sig = mono_method_signature (method);
1729
1730         cinfo = get_call_info (sig, FALSE);
1731         
1732         /* This is the opposite of the code in emit_prolog */
1733
1734         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1735                 ArgInfo *ainfo = cinfo->args + i;
1736                 MonoType *arg_type;
1737                 inst = cfg->varinfo [i];
1738
1739                 if (sig->hasthis && (i == 0))
1740                         arg_type = &mono_defaults.object_class->byval_arg;
1741                 else
1742                         arg_type = sig->params [i - sig->hasthis];
1743
1744                 /*
1745                  * On x86, the arguments are either in their original stack locations, or in
1746                  * global regs.
1747                  */
1748                 if (inst->opcode == OP_REGVAR) {
1749                         g_assert (ainfo->storage == ArgOnStack);
1750                         
1751                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
1752                 }
1753         }
1754
1755         g_free (cinfo);
1756
1757         return code;
1758 }
1759
1760 #define REAL_PRINT_REG(text,reg) \
1761 mono_assert (reg >= 0); \
1762 x86_push_reg (code, X86_EAX); \
1763 x86_push_reg (code, X86_EDX); \
1764 x86_push_reg (code, X86_ECX); \
1765 x86_push_reg (code, reg); \
1766 x86_push_imm (code, reg); \
1767 x86_push_imm (code, text " %d %p\n"); \
1768 x86_mov_reg_imm (code, X86_EAX, printf); \
1769 x86_call_reg (code, X86_EAX); \
1770 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1771 x86_pop_reg (code, X86_ECX); \
1772 x86_pop_reg (code, X86_EDX); \
1773 x86_pop_reg (code, X86_EAX);
1774
1775 /* benchmark and set based on cpu */
1776 #define LOOP_ALIGNMENT 8
1777 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
1778
1779 void
1780 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1781 {
1782         MonoInst *ins;
1783         MonoCallInst *call;
1784         guint offset;
1785         guint8 *code = cfg->native_code + cfg->code_len;
1786         MonoInst *last_ins = NULL;
1787         guint last_offset = 0;
1788         int max_len, cpos;
1789
1790         if (cfg->opt & MONO_OPT_PEEPHOLE)
1791                 peephole_pass (cfg, bb);
1792
1793         if (cfg->opt & MONO_OPT_LOOP) {
1794                 int pad, align = LOOP_ALIGNMENT;
1795                 /* set alignment depending on cpu */
1796                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
1797                         pad = align - pad;
1798                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
1799                         x86_padding (code, pad);
1800                         cfg->code_len += pad;
1801                         bb->native_offset = cfg->code_len;
1802                 }
1803         }
1804
1805         if (cfg->verbose_level > 2)
1806                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1807
1808         cpos = bb->max_offset;
1809
1810         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
1811                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
1812                 g_assert (!cfg->compile_aot);
1813                 cpos += 6;
1814
1815                 cov->data [bb->dfn].cil_code = bb->cil_code;
1816                 /* this is not thread save, but good enough */
1817                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
1818         }
1819
1820         offset = code - cfg->native_code;
1821
1822         mono_debug_open_block (cfg, bb, offset);
1823
1824         ins = bb->code;
1825         while (ins) {
1826                 offset = code - cfg->native_code;
1827
1828                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
1829
1830                 if (offset > (cfg->code_size - max_len - 16)) {
1831                         cfg->code_size *= 2;
1832                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
1833                         code = cfg->native_code + offset;
1834                         mono_jit_stats.code_reallocs++;
1835                 }
1836
1837                 mono_debug_record_line_number (cfg, ins, offset);
1838
1839                 switch (ins->opcode) {
1840                 case OP_BIGMUL:
1841                         x86_mul_reg (code, ins->sreg2, TRUE);
1842                         break;
1843                 case OP_BIGMUL_UN:
1844                         x86_mul_reg (code, ins->sreg2, FALSE);
1845                         break;
1846                 case OP_X86_SETEQ_MEMBASE:
1847                 case OP_X86_SETNE_MEMBASE:
1848                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
1849                                          ins->inst_basereg, ins->inst_offset, TRUE);
1850                         break;
1851                 case OP_STOREI1_MEMBASE_IMM:
1852                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
1853                         break;
1854                 case OP_STOREI2_MEMBASE_IMM:
1855                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
1856                         break;
1857                 case OP_STORE_MEMBASE_IMM:
1858                 case OP_STOREI4_MEMBASE_IMM:
1859                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
1860                         break;
1861                 case OP_STOREI1_MEMBASE_REG:
1862                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
1863                         break;
1864                 case OP_STOREI2_MEMBASE_REG:
1865                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
1866                         break;
1867                 case OP_STORE_MEMBASE_REG:
1868                 case OP_STOREI4_MEMBASE_REG:
1869                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
1870                         break;
1871                 case CEE_LDIND_I:
1872                 case CEE_LDIND_I4:
1873                 case CEE_LDIND_U4:
1874                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
1875                         break;
1876                 case OP_LOADU4_MEM:
1877                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
1878                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
1879                         break;
1880                 case OP_LOAD_MEMBASE:
1881                 case OP_LOADI4_MEMBASE:
1882                 case OP_LOADU4_MEMBASE:
1883                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
1884                         break;
1885                 case OP_LOADU1_MEMBASE:
1886                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
1887                         break;
1888                 case OP_LOADI1_MEMBASE:
1889                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
1890                         break;
1891                 case OP_LOADU2_MEMBASE:
1892                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
1893                         break;
1894                 case OP_LOADI2_MEMBASE:
1895                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
1896                         break;
1897                 case CEE_CONV_I1:
1898                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
1899                         break;
1900                 case CEE_CONV_I2:
1901                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
1902                         break;
1903                 case CEE_CONV_U1:
1904                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
1905                         break;
1906                 case CEE_CONV_U2:
1907                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
1908                         break;
1909                 case OP_COMPARE:
1910                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
1911                         break;
1912                 case OP_COMPARE_IMM:
1913                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
1914                         break;
1915                 case OP_X86_COMPARE_MEMBASE_REG:
1916                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
1917                         break;
1918                 case OP_X86_COMPARE_MEMBASE_IMM:
1919                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1920                         break;
1921                 case OP_X86_COMPARE_MEMBASE8_IMM:
1922                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1923                         break;
1924                 case OP_X86_COMPARE_REG_MEMBASE:
1925                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
1926                         break;
1927                 case OP_X86_COMPARE_MEM_IMM:
1928                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
1929                         break;
1930                 case OP_X86_TEST_NULL:
1931                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
1932                         break;
1933                 case OP_X86_ADD_MEMBASE_IMM:
1934                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1935                         break;
1936                 case OP_X86_ADD_MEMBASE:
1937                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
1938                         break;
1939                 case OP_X86_SUB_MEMBASE_IMM:
1940                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1941                         break;
1942                 case OP_X86_SUB_MEMBASE:
1943                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
1944                         break;
1945                 case OP_X86_AND_MEMBASE_IMM:
1946                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1947                         break;
1948                 case OP_X86_OR_MEMBASE_IMM:
1949                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1950                         break;
1951                 case OP_X86_XOR_MEMBASE_IMM:
1952                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1953                         break;
1954                 case OP_X86_INC_MEMBASE:
1955                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
1956                         break;
1957                 case OP_X86_INC_REG:
1958                         x86_inc_reg (code, ins->dreg);
1959                         break;
1960                 case OP_X86_DEC_MEMBASE:
1961                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
1962                         break;
1963                 case OP_X86_DEC_REG:
1964                         x86_dec_reg (code, ins->dreg);
1965                         break;
1966                 case OP_X86_MUL_MEMBASE:
1967                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
1968                         break;
1969                 case CEE_BREAK:
1970                         x86_breakpoint (code);
1971                         break;
1972                 case OP_ADDCC:
1973                 case CEE_ADD:
1974                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
1975                         break;
1976                 case OP_ADC:
1977                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
1978                         break;
1979                 case OP_ADDCC_IMM:
1980                 case OP_ADD_IMM:
1981                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
1982                         break;
1983                 case OP_ADC_IMM:
1984                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
1985                         break;
1986                 case OP_SUBCC:
1987                 case CEE_SUB:
1988                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
1989                         break;
1990                 case OP_SBB:
1991                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
1992                         break;
1993                 case OP_SUBCC_IMM:
1994                 case OP_SUB_IMM:
1995                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
1996                         break;
1997                 case OP_SBB_IMM:
1998                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
1999                         break;
2000                 case CEE_AND:
2001                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2002                         break;
2003                 case OP_AND_IMM:
2004                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2005                         break;
2006                 case CEE_DIV:
2007                         x86_cdq (code);
2008                         x86_div_reg (code, ins->sreg2, TRUE);
2009                         break;
2010                 case CEE_DIV_UN:
2011                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2012                         x86_div_reg (code, ins->sreg2, FALSE);
2013                         break;
2014                 case OP_DIV_IMM:
2015                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2016                         x86_cdq (code);
2017                         x86_div_reg (code, ins->sreg2, TRUE);
2018                         break;
2019                 case CEE_REM:
2020                         x86_cdq (code);
2021                         x86_div_reg (code, ins->sreg2, TRUE);
2022                         break;
2023                 case CEE_REM_UN:
2024                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2025                         x86_div_reg (code, ins->sreg2, FALSE);
2026                         break;
2027                 case OP_REM_IMM:
2028                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2029                         x86_cdq (code);
2030                         x86_div_reg (code, ins->sreg2, TRUE);
2031                         break;
2032                 case CEE_OR:
2033                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2034                         break;
2035                 case OP_OR_IMM:
2036                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2037                         break;
2038                 case CEE_XOR:
2039                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2040                         break;
2041                 case OP_XOR_IMM:
2042                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2043                         break;
2044                 case CEE_SHL:
2045                         g_assert (ins->sreg2 == X86_ECX);
2046                         x86_shift_reg (code, X86_SHL, ins->dreg);
2047                         break;
2048                 case CEE_SHR:
2049                         g_assert (ins->sreg2 == X86_ECX);
2050                         x86_shift_reg (code, X86_SAR, ins->dreg);
2051                         break;
2052                 case OP_SHR_IMM:
2053                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2054                         break;
2055                 case OP_SHR_UN_IMM:
2056                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2057                         break;
2058                 case CEE_SHR_UN:
2059                         g_assert (ins->sreg2 == X86_ECX);
2060                         x86_shift_reg (code, X86_SHR, ins->dreg);
2061                         break;
2062                 case OP_SHL_IMM:
2063                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2064                         break;
2065                 case OP_LSHL: {
2066                         guint8 *jump_to_end;
2067
2068                         /* handle shifts below 32 bits */
2069                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2070                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2071
2072                         x86_test_reg_imm (code, X86_ECX, 32);
2073                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2074
2075                         /* handle shift over 32 bit */
2076                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2077                         x86_clear_reg (code, ins->sreg1);
2078                         
2079                         x86_patch (jump_to_end, code);
2080                         }
2081                         break;
2082                 case OP_LSHR: {
2083                         guint8 *jump_to_end;
2084
2085                         /* handle shifts below 32 bits */
2086                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2087                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2088
2089                         x86_test_reg_imm (code, X86_ECX, 32);
2090                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2091
2092                         /* handle shifts over 31 bits */
2093                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2094                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2095                         
2096                         x86_patch (jump_to_end, code);
2097                         }
2098                         break;
2099                 case OP_LSHR_UN: {
2100                         guint8 *jump_to_end;
2101
2102                         /* handle shifts below 32 bits */
2103                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2104                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2105
2106                         x86_test_reg_imm (code, X86_ECX, 32);
2107                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2108
2109                         /* handle shifts over 31 bits */
2110                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2111                         x86_clear_reg (code, ins->backend.reg3);
2112                         
2113                         x86_patch (jump_to_end, code);
2114                         }
2115                         break;
2116                 case OP_LSHL_IMM:
2117                         if (ins->inst_imm >= 32) {
2118                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2119                                 x86_clear_reg (code, ins->sreg1);
2120                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2121                         } else {
2122                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2123                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2124                         }
2125                         break;
2126                 case OP_LSHR_IMM:
2127                         if (ins->inst_imm >= 32) {
2128                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2129                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2130                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2131                         } else {
2132                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2133                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2134                         }
2135                         break;
2136                 case OP_LSHR_UN_IMM:
2137                         if (ins->inst_imm >= 32) {
2138                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2139                                 x86_clear_reg (code, ins->backend.reg3);
2140                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2141                         } else {
2142                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2143                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2144                         }
2145                         break;
2146                 case CEE_NOT:
2147                         x86_not_reg (code, ins->sreg1);
2148                         break;
2149                 case CEE_NEG:
2150                         x86_neg_reg (code, ins->sreg1);
2151                         break;
2152                 case OP_SEXT_I1:
2153                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2154                         break;
2155                 case OP_SEXT_I2:
2156                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2157                         break;
2158                 case CEE_MUL:
2159                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2160                         break;
2161                 case OP_MUL_IMM:
2162                         switch (ins->inst_imm) {
2163                         case 2:
2164                                 /* MOV r1, r2 */
2165                                 /* ADD r1, r1 */
2166                                 if (ins->dreg != ins->sreg1)
2167                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2168                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2169                                 break;
2170                         case 3:
2171                                 /* LEA r1, [r2 + r2*2] */
2172                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2173                                 break;
2174                         case 5:
2175                                 /* LEA r1, [r2 + r2*4] */
2176                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2177                                 break;
2178                         case 6:
2179                                 /* LEA r1, [r2 + r2*2] */
2180                                 /* ADD r1, r1          */
2181                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2182                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2183                                 break;
2184                         case 9:
2185                                 /* LEA r1, [r2 + r2*8] */
2186                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2187                                 break;
2188                         case 10:
2189                                 /* LEA r1, [r2 + r2*4] */
2190                                 /* ADD r1, r1          */
2191                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2192                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2193                                 break;
2194                         case 12:
2195                                 /* LEA r1, [r2 + r2*2] */
2196                                 /* SHL r1, 2           */
2197                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2198                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2199                                 break;
2200                         case 25:
2201                                 /* LEA r1, [r2 + r2*4] */
2202                                 /* LEA r1, [r1 + r1*4] */
2203                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2204                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2205                                 break;
2206                         case 100:
2207                                 /* LEA r1, [r2 + r2*4] */
2208                                 /* SHL r1, 2           */
2209                                 /* LEA r1, [r1 + r1*4] */
2210                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2211                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2212                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2213                                 break;
2214                         default:
2215                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2216                                 break;
2217                         }
2218                         break;
2219                 case CEE_MUL_OVF:
2220                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2221                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2222                         break;
2223                 case CEE_MUL_OVF_UN: {
2224                         /* the mul operation and the exception check should most likely be split */
2225                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2226                         /*g_assert (ins->sreg2 == X86_EAX);
2227                         g_assert (ins->dreg == X86_EAX);*/
2228                         if (ins->sreg2 == X86_EAX) {
2229                                 non_eax_reg = ins->sreg1;
2230                         } else if (ins->sreg1 == X86_EAX) {
2231                                 non_eax_reg = ins->sreg2;
2232                         } else {
2233                                 /* no need to save since we're going to store to it anyway */
2234                                 if (ins->dreg != X86_EAX) {
2235                                         saved_eax = TRUE;
2236                                         x86_push_reg (code, X86_EAX);
2237                                 }
2238                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2239                                 non_eax_reg = ins->sreg2;
2240                         }
2241                         if (ins->dreg == X86_EDX) {
2242                                 if (!saved_eax) {
2243                                         saved_eax = TRUE;
2244                                         x86_push_reg (code, X86_EAX);
2245                                 }
2246                         } else if (ins->dreg != X86_EAX) {
2247                                 saved_edx = TRUE;
2248                                 x86_push_reg (code, X86_EDX);
2249                         }
2250                         x86_mul_reg (code, non_eax_reg, FALSE);
2251                         /* save before the check since pop and mov don't change the flags */
2252                         if (ins->dreg != X86_EAX)
2253                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2254                         if (saved_edx)
2255                                 x86_pop_reg (code, X86_EDX);
2256                         if (saved_eax)
2257                                 x86_pop_reg (code, X86_EAX);
2258                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2259                         break;
2260                 }
2261                 case OP_ICONST:
2262                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2263                         break;
2264                 case OP_AOTCONST:
2265                         g_assert_not_reached ();
2266                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2267                         x86_mov_reg_imm (code, ins->dreg, 0);
2268                         break;
2269                 case OP_LOAD_GOTADDR:
2270                         x86_call_imm (code, 0);
2271                         /* 
2272                          * The patch needs to point to the pop, since the GOT offset needs 
2273                          * to be added to that address.
2274                          */
2275                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2276                         x86_pop_reg (code, ins->dreg);
2277                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2278                         break;
2279                 case OP_GOT_ENTRY:
2280                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2281                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2282                         break;
2283                 case OP_X86_PUSH_GOT_ENTRY:
2284                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2285                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2286                         break;
2287                 case CEE_CONV_I4:
2288                 case OP_MOVE:
2289                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2290                         break;
2291                 case CEE_CONV_U4:
2292                         g_assert_not_reached ();
2293                 case CEE_JMP: {
2294                         /*
2295                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2296                          * Keep in sync with the code in emit_epilog.
2297                          */
2298                         int pos = 0;
2299
2300                         /* FIXME: no tracing support... */
2301                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2302                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2303                         /* reset offset to make max_len work */
2304                         offset = code - cfg->native_code;
2305
2306                         g_assert (!cfg->method->save_lmf);
2307
2308                         code = emit_load_volatile_arguments (cfg, code);
2309
2310                         if (cfg->used_int_regs & (1 << X86_EBX))
2311                                 pos -= 4;
2312                         if (cfg->used_int_regs & (1 << X86_EDI))
2313                                 pos -= 4;
2314                         if (cfg->used_int_regs & (1 << X86_ESI))
2315                                 pos -= 4;
2316                         if (pos)
2317                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2318         
2319                         if (cfg->used_int_regs & (1 << X86_ESI))
2320                                 x86_pop_reg (code, X86_ESI);
2321                         if (cfg->used_int_regs & (1 << X86_EDI))
2322                                 x86_pop_reg (code, X86_EDI);
2323                         if (cfg->used_int_regs & (1 << X86_EBX))
2324                                 x86_pop_reg (code, X86_EBX);
2325         
2326                         /* restore ESP/EBP */
2327                         x86_leave (code);
2328                         offset = code - cfg->native_code;
2329                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2330                         x86_jump32 (code, 0);
2331                         break;
2332                 }
2333                 case OP_CHECK_THIS:
2334                         /* ensure ins->sreg1 is not NULL
2335                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2336                          * cmp DWORD PTR [eax], 0
2337                          */
2338                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2339                         break;
2340                 case OP_ARGLIST: {
2341                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2342                         x86_push_reg (code, hreg);
2343                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2344                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2345                         x86_pop_reg (code, hreg);
2346                         break;
2347                 }
2348                 case OP_FCALL:
2349                 case OP_LCALL:
2350                 case OP_VCALL:
2351                 case OP_VOIDCALL:
2352                 case CEE_CALL:
2353                         call = (MonoCallInst*)ins;
2354                         if (ins->flags & MONO_INST_HAS_METHOD)
2355                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2356                         else
2357                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2358                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2359                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2360                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2361                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2362                                  * smart enough to do that optimization yet
2363                                  *
2364                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2365                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2366                                  * (most likely from locality benefits). People with other processors should
2367                                  * check on theirs to see what happens.
2368                                  */
2369                                 if (call->stack_usage == 4) {
2370                                         /* we want to use registers that won't get used soon, so use
2371                                          * ecx, as eax will get allocated first. edx is used by long calls,
2372                                          * so we can't use that.
2373                                          */
2374                                         
2375                                         x86_pop_reg (code, X86_ECX);
2376                                 } else {
2377                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2378                                 }
2379                         }
2380                         code = emit_move_return_value (cfg, ins, code);
2381                         break;
2382                 case OP_FCALL_REG:
2383                 case OP_LCALL_REG:
2384                 case OP_VCALL_REG:
2385                 case OP_VOIDCALL_REG:
2386                 case OP_CALL_REG:
2387                         call = (MonoCallInst*)ins;
2388                         x86_call_reg (code, ins->sreg1);
2389                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2390                                 if (call->stack_usage == 4)
2391                                         x86_pop_reg (code, X86_ECX);
2392                                 else
2393                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2394                         }
2395                         code = emit_move_return_value (cfg, ins, code);
2396                         break;
2397                 case OP_FCALL_MEMBASE:
2398                 case OP_LCALL_MEMBASE:
2399                 case OP_VCALL_MEMBASE:
2400                 case OP_VOIDCALL_MEMBASE:
2401                 case OP_CALL_MEMBASE:
2402                         call = (MonoCallInst*)ins;
2403                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2404                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2405                                 if (call->stack_usage == 4)
2406                                         x86_pop_reg (code, X86_ECX);
2407                                 else
2408                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2409                         }
2410                         code = emit_move_return_value (cfg, ins, code);
2411                         break;
2412                 case OP_OUTARG:
2413                 case OP_X86_PUSH:
2414                         x86_push_reg (code, ins->sreg1);
2415                         break;
2416                 case OP_X86_PUSH_IMM:
2417                         x86_push_imm (code, ins->inst_imm);
2418                         break;
2419                 case OP_X86_PUSH_MEMBASE:
2420                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2421                         break;
2422                 case OP_X86_PUSH_OBJ: 
2423                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2424                         x86_push_reg (code, X86_EDI);
2425                         x86_push_reg (code, X86_ESI);
2426                         x86_push_reg (code, X86_ECX);
2427                         if (ins->inst_offset)
2428                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2429                         else
2430                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2431                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2432                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2433                         x86_cld (code);
2434                         x86_prefix (code, X86_REP_PREFIX);
2435                         x86_movsd (code);
2436                         x86_pop_reg (code, X86_ECX);
2437                         x86_pop_reg (code, X86_ESI);
2438                         x86_pop_reg (code, X86_EDI);
2439                         break;
2440                 case OP_X86_LEA:
2441                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2442                         break;
2443                 case OP_X86_LEA_MEMBASE:
2444                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2445                         break;
2446                 case OP_X86_XCHG:
2447                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2448                         break;
2449                 case OP_LOCALLOC:
2450                         /* keep alignment */
2451                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2452                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2453                         code = mono_emit_stack_alloc (code, ins);
2454                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2455                         break;
2456                 case CEE_RET:
2457                         x86_ret (code);
2458                         break;
2459                 case CEE_THROW: {
2460                         x86_push_reg (code, ins->sreg1);
2461                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2462                                                           (gpointer)"mono_arch_throw_exception");
2463                         break;
2464                 }
2465                 case OP_RETHROW: {
2466                         x86_push_reg (code, ins->sreg1);
2467                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2468                                                           (gpointer)"mono_arch_rethrow_exception");
2469                         break;
2470                 }
2471                 case OP_CALL_HANDLER: 
2472                         /* Align stack */
2473 #ifdef __APPLE__
2474                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2475 #endif
2476                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2477                         x86_call_imm (code, 0);
2478 #ifdef __APPLE__
2479                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2480 #endif
2481                         break;
2482                 case OP_LABEL:
2483                         ins->inst_c0 = code - cfg->native_code;
2484                         break;
2485                 case CEE_BR:
2486                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2487                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2488                         //break;
2489                         if (ins->flags & MONO_INST_BRLABEL) {
2490                                 if (ins->inst_i0->inst_c0) {
2491                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2492                                 } else {
2493                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2494                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2495                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2496                                                 x86_jump8 (code, 0);
2497                                         else 
2498                                                 x86_jump32 (code, 0);
2499                                 }
2500                         } else {
2501                                 if (ins->inst_target_bb->native_offset) {
2502                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2503                                 } else {
2504                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2505                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2506                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2507                                                 x86_jump8 (code, 0);
2508                                         else 
2509                                                 x86_jump32 (code, 0);
2510                                 } 
2511                         }
2512                         break;
2513                 case OP_BR_REG:
2514                         x86_jump_reg (code, ins->sreg1);
2515                         break;
2516                 case OP_CEQ:
2517                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2518                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2519                         break;
2520                 case OP_CLT:
2521                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2522                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2523                         break;
2524                 case OP_CLT_UN:
2525                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2526                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2527                         break;
2528                 case OP_CGT:
2529                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2530                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2531                         break;
2532                 case OP_CGT_UN:
2533                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2534                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2535                         break;
2536                 case OP_CNE:
2537                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2538                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2539                         break;
2540                 case OP_COND_EXC_EQ:
2541                 case OP_COND_EXC_NE_UN:
2542                 case OP_COND_EXC_LT:
2543                 case OP_COND_EXC_LT_UN:
2544                 case OP_COND_EXC_GT:
2545                 case OP_COND_EXC_GT_UN:
2546                 case OP_COND_EXC_GE:
2547                 case OP_COND_EXC_GE_UN:
2548                 case OP_COND_EXC_LE:
2549                 case OP_COND_EXC_LE_UN:
2550                 case OP_COND_EXC_OV:
2551                 case OP_COND_EXC_NO:
2552                 case OP_COND_EXC_C:
2553                 case OP_COND_EXC_NC:
2554                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2555                         break;
2556                 case CEE_BEQ:
2557                 case CEE_BNE_UN:
2558                 case CEE_BLT:
2559                 case CEE_BLT_UN:
2560                 case CEE_BGT:
2561                 case CEE_BGT_UN:
2562                 case CEE_BGE:
2563                 case CEE_BGE_UN:
2564                 case CEE_BLE:
2565                 case CEE_BLE_UN:
2566                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2567                         break;
2568
2569                 /* floating point opcodes */
2570                 case OP_R8CONST: {
2571                         double d = *(double *)ins->inst_p0;
2572
2573                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2574                                 x86_fldz (code);
2575                         } else if (d == 1.0) {
2576                                 x86_fld1 (code);
2577                         } else {
2578                                 if (cfg->compile_aot) {
2579                                         guint32 *val = (guint32*)&d;
2580                                         x86_push_imm (code, val [1]);
2581                                         x86_push_imm (code, val [0]);
2582                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2583                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2584                                 }
2585                                 else {
2586                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2587                                         x86_fld (code, NULL, TRUE);
2588                                 }
2589                         }
2590                         break;
2591                 }
2592                 case OP_R4CONST: {
2593                         float f = *(float *)ins->inst_p0;
2594
2595                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2596                                 x86_fldz (code);
2597                         } else if (f == 1.0) {
2598                                 x86_fld1 (code);
2599                         } else {
2600                                 if (cfg->compile_aot) {
2601                                         guint32 val = *(guint32*)&f;
2602                                         x86_push_imm (code, val);
2603                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2604                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2605                                 }
2606                                 else {
2607                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2608                                         x86_fld (code, NULL, FALSE);
2609                                 }
2610                         }
2611                         break;
2612                 }
2613                 case OP_STORER8_MEMBASE_REG:
2614                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2615                         break;
2616                 case OP_LOADR8_SPILL_MEMBASE:
2617                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2618                         x86_fxch (code, 1);
2619                         break;
2620                 case OP_LOADR8_MEMBASE:
2621                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2622                         break;
2623                 case OP_STORER4_MEMBASE_REG:
2624                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2625                         break;
2626                 case OP_LOADR4_MEMBASE:
2627                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2628                         break;
2629                 case CEE_CONV_R4: /* FIXME: change precision */
2630                 case CEE_CONV_R8:
2631                         x86_push_reg (code, ins->sreg1);
2632                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2633                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2634                         break;
2635                 case OP_X86_FP_LOAD_I8:
2636                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2637                         break;
2638                 case OP_X86_FP_LOAD_I4:
2639                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2640                         break;
2641                 case OP_FCONV_TO_I1:
2642                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2643                         break;
2644                 case OP_FCONV_TO_U1:
2645                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2646                         break;
2647                 case OP_FCONV_TO_I2:
2648                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2649                         break;
2650                 case OP_FCONV_TO_U2:
2651                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2652                         break;
2653                 case OP_FCONV_TO_I4:
2654                 case OP_FCONV_TO_I:
2655                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2656                         break;
2657                 case OP_FCONV_TO_I8:
2658                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2659                         x86_fnstcw_membase(code, X86_ESP, 0);
2660                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2661                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2662                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2663                         x86_fldcw_membase (code, X86_ESP, 2);
2664                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2665                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2666                         x86_pop_reg (code, ins->dreg);
2667                         x86_pop_reg (code, ins->backend.reg3);
2668                         x86_fldcw_membase (code, X86_ESP, 0);
2669                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2670                         break;
2671                 case OP_LCONV_TO_R_UN: { 
2672                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2673                         guint8 *br;
2674
2675                         /* load 64bit integer to FP stack */
2676                         x86_push_imm (code, 0);
2677                         x86_push_reg (code, ins->sreg2);
2678                         x86_push_reg (code, ins->sreg1);
2679                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2680                         /* store as 80bit FP value */
2681                         x86_fst80_membase (code, X86_ESP, 0);
2682                         
2683                         /* test if lreg is negative */
2684                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2685                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2686         
2687                         /* add correction constant mn */
2688                         x86_fld80_mem (code, mn);
2689                         x86_fld80_membase (code, X86_ESP, 0);
2690                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2691                         x86_fst80_membase (code, X86_ESP, 0);
2692
2693                         x86_patch (br, code);
2694
2695                         x86_fld80_membase (code, X86_ESP, 0);
2696                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2697
2698                         break;
2699                 }
2700                 case OP_LCONV_TO_OVF_I: {
2701                         guint8 *br [3], *label [1];
2702                         MonoInst *tins;
2703
2704                         /* 
2705                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2706                          */
2707                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2708
2709                         /* If the low word top bit is set, see if we are negative */
2710                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2711                         /* We are not negative (no top bit set, check for our top word to be zero */
2712                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2713                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2714                         label [0] = code;
2715
2716                         /* throw exception */
2717                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2718                         if (tins) {
2719                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2720                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2721                                         x86_jump8 (code, 0);
2722                                 else
2723                                         x86_jump32 (code, 0);
2724                         } else {
2725                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2726                                 x86_jump32 (code, 0);
2727                         }
2728         
2729         
2730                         x86_patch (br [0], code);
2731                         /* our top bit is set, check that top word is 0xfffffff */
2732                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2733                 
2734                         x86_patch (br [1], code);
2735                         /* nope, emit exception */
2736                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2737                         x86_patch (br [2], label [0]);
2738
2739                         if (ins->dreg != ins->sreg1)
2740                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2741                         break;
2742                 }
2743                 case OP_FADD:
2744                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2745                         break;
2746                 case OP_FSUB:
2747                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2748                         break;          
2749                 case OP_FMUL:
2750                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2751                         break;          
2752                 case OP_FDIV:
2753                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2754                         break;          
2755                 case OP_FNEG:
2756                         x86_fchs (code);
2757                         break;          
2758                 case OP_SIN:
2759                         x86_fsin (code);
2760                         x86_fldz (code);
2761                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2762                         break;          
2763                 case OP_COS:
2764                         x86_fcos (code);
2765                         x86_fldz (code);
2766                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2767                         break;          
2768                 case OP_ABS:
2769                         x86_fabs (code);
2770                         break;          
2771                 case OP_TAN: {
2772                         /* 
2773                          * it really doesn't make sense to inline all this code,
2774                          * it's here just to show that things may not be as simple 
2775                          * as they appear.
2776                          */
2777                         guchar *check_pos, *end_tan, *pop_jump;
2778                         x86_push_reg (code, X86_EAX);
2779                         x86_fptan (code);
2780                         x86_fnstsw (code);
2781                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2782                         check_pos = code;
2783                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2784                         x86_fstp (code, 0); /* pop the 1.0 */
2785                         end_tan = code;
2786                         x86_jump8 (code, 0);
2787                         x86_fldpi (code);
2788                         x86_fp_op (code, X86_FADD, 0);
2789                         x86_fxch (code, 1);
2790                         x86_fprem1 (code);
2791                         x86_fstsw (code);
2792                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2793                         pop_jump = code;
2794                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2795                         x86_fstp (code, 1);
2796                         x86_fptan (code);
2797                         x86_patch (pop_jump, code);
2798                         x86_fstp (code, 0); /* pop the 1.0 */
2799                         x86_patch (check_pos, code);
2800                         x86_patch (end_tan, code);
2801                         x86_fldz (code);
2802                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2803                         x86_pop_reg (code, X86_EAX);
2804                         break;
2805                 }
2806                 case OP_ATAN:
2807                         x86_fld1 (code);
2808                         x86_fpatan (code);
2809                         x86_fldz (code);
2810                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2811                         break;          
2812                 case OP_SQRT:
2813                         x86_fsqrt (code);
2814                         break;          
2815                 case OP_X86_FPOP:
2816                         x86_fstp (code, 0);
2817                         break;          
2818                 case OP_FREM: {
2819                         guint8 *l1, *l2;
2820
2821                         x86_push_reg (code, X86_EAX);
2822                         /* we need to exchange ST(0) with ST(1) */
2823                         x86_fxch (code, 1);
2824
2825                         /* this requires a loop, because fprem somtimes 
2826                          * returns a partial remainder */
2827                         l1 = code;
2828                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2829                         /* x86_fprem1 (code); */
2830                         x86_fprem (code);
2831                         x86_fnstsw (code);
2832                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
2833                         l2 = code + 2;
2834                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2835
2836                         /* pop result */
2837                         x86_fstp (code, 1);
2838
2839                         x86_pop_reg (code, X86_EAX);
2840                         break;
2841                 }
2842                 case OP_FCOMPARE:
2843                         if (cfg->opt & MONO_OPT_FCMOV) {
2844                                 x86_fcomip (code, 1);
2845                                 x86_fstp (code, 0);
2846                                 break;
2847                         }
2848                         /* this overwrites EAX */
2849                         EMIT_FPCOMPARE(code);
2850                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2851                         break;
2852                 case OP_FCEQ:
2853                         if (cfg->opt & MONO_OPT_FCMOV) {
2854                                 /* zeroing the register at the start results in 
2855                                  * shorter and faster code (we can also remove the widening op)
2856                                  */
2857                                 guchar *unordered_check;
2858                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2859                                 x86_fcomip (code, 1);
2860                                 x86_fstp (code, 0);
2861                                 unordered_check = code;
2862                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2863                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2864                                 x86_patch (unordered_check, code);
2865                                 break;
2866                         }
2867                         if (ins->dreg != X86_EAX) 
2868                                 x86_push_reg (code, X86_EAX);
2869
2870                         EMIT_FPCOMPARE(code);
2871                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2872                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2873                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2874                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2875
2876                         if (ins->dreg != X86_EAX) 
2877                                 x86_pop_reg (code, X86_EAX);
2878                         break;
2879                 case OP_FCLT:
2880                 case OP_FCLT_UN:
2881                         if (cfg->opt & MONO_OPT_FCMOV) {
2882                                 /* zeroing the register at the start results in 
2883                                  * shorter and faster code (we can also remove the widening op)
2884                                  */
2885                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2886                                 x86_fcomip (code, 1);
2887                                 x86_fstp (code, 0);
2888                                 if (ins->opcode == OP_FCLT_UN) {
2889                                         guchar *unordered_check = code;
2890                                         guchar *jump_to_end;
2891                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2892                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2893                                         jump_to_end = code;
2894                                         x86_jump8 (code, 0);
2895                                         x86_patch (unordered_check, code);
2896                                         x86_inc_reg (code, ins->dreg);
2897                                         x86_patch (jump_to_end, code);
2898                                 } else {
2899                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2900                                 }
2901                                 break;
2902                         }
2903                         if (ins->dreg != X86_EAX) 
2904                                 x86_push_reg (code, X86_EAX);
2905
2906                         EMIT_FPCOMPARE(code);
2907                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2908                         if (ins->opcode == OP_FCLT_UN) {
2909                                 guchar *is_not_zero_check, *end_jump;
2910                                 is_not_zero_check = code;
2911                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2912                                 end_jump = code;
2913                                 x86_jump8 (code, 0);
2914                                 x86_patch (is_not_zero_check, code);
2915                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2916
2917                                 x86_patch (end_jump, code);
2918                         }
2919                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2920                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2921
2922                         if (ins->dreg != X86_EAX) 
2923                                 x86_pop_reg (code, X86_EAX);
2924                         break;
2925                 case OP_FCGT:
2926                 case OP_FCGT_UN:
2927                         if (cfg->opt & MONO_OPT_FCMOV) {
2928                                 /* zeroing the register at the start results in 
2929                                  * shorter and faster code (we can also remove the widening op)
2930                                  */
2931                                 guchar *unordered_check;
2932                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2933                                 x86_fcomip (code, 1);
2934                                 x86_fstp (code, 0);
2935                                 if (ins->opcode == OP_FCGT) {
2936                                         unordered_check = code;
2937                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2938                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2939                                         x86_patch (unordered_check, code);
2940                                 } else {
2941                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2942                                 }
2943                                 break;
2944                         }
2945                         if (ins->dreg != X86_EAX) 
2946                                 x86_push_reg (code, X86_EAX);
2947
2948                         EMIT_FPCOMPARE(code);
2949                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2950                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2951                         if (ins->opcode == OP_FCGT_UN) {
2952                                 guchar *is_not_zero_check, *end_jump;
2953                                 is_not_zero_check = code;
2954                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2955                                 end_jump = code;
2956                                 x86_jump8 (code, 0);
2957                                 x86_patch (is_not_zero_check, code);
2958                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2959         
2960                                 x86_patch (end_jump, code);
2961                         }
2962                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2963                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2964
2965                         if (ins->dreg != X86_EAX) 
2966                                 x86_pop_reg (code, X86_EAX);
2967                         break;
2968                 case OP_FBEQ:
2969                         if (cfg->opt & MONO_OPT_FCMOV) {
2970                                 guchar *jump = code;
2971                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
2972                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2973                                 x86_patch (jump, code);
2974                                 break;
2975                         }
2976                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2977                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2978                         break;
2979                 case OP_FBNE_UN:
2980                         /* Branch if C013 != 100 */
2981                         if (cfg->opt & MONO_OPT_FCMOV) {
2982                                 /* branch if !ZF or (PF|CF) */
2983                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2984                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2985                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
2986                                 break;
2987                         }
2988                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2989                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2990                         break;
2991                 case OP_FBLT:
2992                         if (cfg->opt & MONO_OPT_FCMOV) {
2993                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2994                                 break;
2995                         }
2996                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2997                         break;
2998                 case OP_FBLT_UN:
2999                         if (cfg->opt & MONO_OPT_FCMOV) {
3000                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3001                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3002                                 break;
3003                         }
3004                         if (ins->opcode == OP_FBLT_UN) {
3005                                 guchar *is_not_zero_check, *end_jump;
3006                                 is_not_zero_check = code;
3007                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3008                                 end_jump = code;
3009                                 x86_jump8 (code, 0);
3010                                 x86_patch (is_not_zero_check, code);
3011                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3012
3013                                 x86_patch (end_jump, code);
3014                         }
3015                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3016                         break;
3017                 case OP_FBGT:
3018                 case OP_FBGT_UN:
3019                         if (cfg->opt & MONO_OPT_FCMOV) {
3020                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3021                                 break;
3022                         }
3023                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3024                         if (ins->opcode == OP_FBGT_UN) {
3025                                 guchar *is_not_zero_check, *end_jump;
3026                                 is_not_zero_check = code;
3027                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3028                                 end_jump = code;
3029                                 x86_jump8 (code, 0);
3030                                 x86_patch (is_not_zero_check, code);
3031                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3032
3033                                 x86_patch (end_jump, code);
3034                         }
3035                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3036                         break;
3037                 case OP_FBGE:
3038                         /* Branch if C013 == 100 or 001 */
3039                         if (cfg->opt & MONO_OPT_FCMOV) {
3040                                 guchar *br1;
3041
3042                                 /* skip branch if C1=1 */
3043                                 br1 = code;
3044                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3045                                 /* branch if (C0 | C3) = 1 */
3046                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3047                                 x86_patch (br1, code);
3048                                 break;
3049                         }
3050                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3051                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3052                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3053                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3054                         break;
3055                 case OP_FBGE_UN:
3056                         /* Branch if C013 == 000 */
3057                         if (cfg->opt & MONO_OPT_FCMOV) {
3058                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3059                                 break;
3060                         }
3061                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3062                         break;
3063                 case OP_FBLE:
3064                         /* Branch if C013=000 or 100 */
3065                         if (cfg->opt & MONO_OPT_FCMOV) {
3066                                 guchar *br1;
3067
3068                                 /* skip branch if C1=1 */
3069                                 br1 = code;
3070                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3071                                 /* branch if C0=0 */
3072                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3073                                 x86_patch (br1, code);
3074                                 break;
3075                         }
3076                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3077                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3078                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3079                         break;
3080                 case OP_FBLE_UN:
3081                         /* Branch if C013 != 001 */
3082                         if (cfg->opt & MONO_OPT_FCMOV) {
3083                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3084                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3085                                 break;
3086                         }
3087                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3088                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3089                         break;
3090                 case CEE_CKFINITE: {
3091                         x86_push_reg (code, X86_EAX);
3092                         x86_fxam (code);
3093                         x86_fnstsw (code);
3094                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3095                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3096                         x86_pop_reg (code, X86_EAX);
3097                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3098                         break;
3099                 }
3100                 case OP_TLS_GET: {
3101                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3102                         break;
3103                 }
3104                 case OP_MEMORY_BARRIER: {
3105                         /* Not needed on x86 */
3106                         break;
3107                 }
3108                 case OP_ATOMIC_ADD_I4: {
3109                         int dreg = ins->dreg;
3110
3111                         if (dreg == ins->inst_basereg) {
3112                                 x86_push_reg (code, ins->sreg2);
3113                                 dreg = ins->sreg2;
3114                         } 
3115                         
3116                         if (dreg != ins->sreg2)
3117                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3118
3119                         x86_prefix (code, X86_LOCK_PREFIX);
3120                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3121
3122                         if (dreg != ins->dreg) {
3123                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3124                                 x86_pop_reg (code, dreg);
3125                         }
3126
3127                         break;
3128                 }
3129                 case OP_ATOMIC_ADD_NEW_I4: {
3130                         int dreg = ins->dreg;
3131
3132                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3133                         if (ins->sreg2 == dreg) {
3134                                 if (dreg == X86_EBX) {
3135                                         dreg = X86_EDI;
3136                                         if (ins->inst_basereg == X86_EDI)
3137                                                 dreg = X86_ESI;
3138                                 } else {
3139                                         dreg = X86_EBX;
3140                                         if (ins->inst_basereg == X86_EBX)
3141                                                 dreg = X86_EDI;
3142                                 }
3143                         } else if (ins->inst_basereg == dreg) {
3144                                 if (dreg == X86_EBX) {
3145                                         dreg = X86_EDI;
3146                                         if (ins->sreg2 == X86_EDI)
3147                                                 dreg = X86_ESI;
3148                                 } else {
3149                                         dreg = X86_EBX;
3150                                         if (ins->sreg2 == X86_EBX)
3151                                                 dreg = X86_EDI;
3152                                 }
3153                         }
3154
3155                         if (dreg != ins->dreg) {
3156                                 x86_push_reg (code, dreg);
3157                         }
3158
3159                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3160                         x86_prefix (code, X86_LOCK_PREFIX);
3161                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3162                         /* dreg contains the old value, add with sreg2 value */
3163                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3164                         
3165                         if (ins->dreg != dreg) {
3166                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3167                                 x86_pop_reg (code, dreg);
3168                         }
3169
3170                         break;
3171                 }
3172                 case OP_ATOMIC_EXCHANGE_I4: {
3173                         guchar *br[2];
3174                         int sreg2 = ins->sreg2;
3175                         int breg = ins->inst_basereg;
3176
3177                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3178                          * hack to overcome limits in x86 reg allocator 
3179                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3180                          */
3181                         if (ins->dreg != X86_EAX)
3182                                 x86_push_reg (code, X86_EAX);
3183                         
3184                         /* We need the EAX reg for the cmpxchg */
3185                         if (ins->sreg2 == X86_EAX) {
3186                                 x86_push_reg (code, X86_EDX);
3187                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3188                                 sreg2 = X86_EDX;
3189                         }
3190
3191                         if (breg == X86_EAX) {
3192                                 x86_push_reg (code, X86_ESI);
3193                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3194                                 breg = X86_ESI;
3195                         }
3196
3197                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3198
3199                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3200                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3201                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3202                         x86_patch (br [1], br [0]);
3203
3204                         if (breg != ins->inst_basereg)
3205                                 x86_pop_reg (code, X86_ESI);
3206
3207                         if (ins->dreg != X86_EAX) {
3208                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3209                                 x86_pop_reg (code, X86_EAX);
3210                         }
3211
3212                         if (ins->sreg2 != sreg2)
3213                                 x86_pop_reg (code, X86_EDX);
3214
3215                         break;
3216                 }
3217                 default:
3218                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3219                         g_assert_not_reached ();
3220                 }
3221
3222                 if ((code - cfg->native_code - offset) > max_len) {
3223                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3224                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3225                         g_assert_not_reached ();
3226                 }
3227                
3228                 cpos += max_len;
3229
3230                 last_ins = ins;
3231                 last_offset = offset;
3232                 
3233                 ins = ins->next;
3234         }
3235
3236         cfg->code_len = code - cfg->native_code;
3237 }
3238
3239 void
3240 mono_arch_register_lowlevel_calls (void)
3241 {
3242 }
3243
3244 void
3245 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3246 {
3247         MonoJumpInfo *patch_info;
3248         gboolean compile_aot = !run_cctors;
3249
3250         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3251                 unsigned char *ip = patch_info->ip.i + code;
3252                 const unsigned char *target;
3253
3254                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3255
3256                 if (compile_aot) {
3257                         switch (patch_info->type) {
3258                         case MONO_PATCH_INFO_BB:
3259                         case MONO_PATCH_INFO_LABEL:
3260                                 break;
3261                         default:
3262                                 /* No need to patch these */
3263                                 continue;
3264                         }
3265                 }
3266
3267                 switch (patch_info->type) {
3268                 case MONO_PATCH_INFO_IP:
3269                         *((gconstpointer *)(ip)) = target;
3270                         break;
3271                 case MONO_PATCH_INFO_CLASS_INIT: {
3272                         guint8 *code = ip;
3273                         /* Might already been changed to a nop */
3274                         x86_call_code (code, 0);
3275                         x86_patch (ip, target);
3276                         break;
3277                 }
3278                 case MONO_PATCH_INFO_ABS:
3279                 case MONO_PATCH_INFO_METHOD:
3280                 case MONO_PATCH_INFO_METHOD_JUMP:
3281                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3282                 case MONO_PATCH_INFO_BB:
3283                 case MONO_PATCH_INFO_LABEL:
3284                         x86_patch (ip, target);
3285                         break;
3286                 case MONO_PATCH_INFO_NONE:
3287                         break;
3288                 default: {
3289                         guint32 offset = mono_arch_get_patch_offset (ip);
3290                         *((gconstpointer *)(ip + offset)) = target;
3291                         break;
3292                 }
3293                 }
3294         }
3295 }
3296
3297 guint8 *
3298 mono_arch_emit_prolog (MonoCompile *cfg)
3299 {
3300         MonoMethod *method = cfg->method;
3301         MonoBasicBlock *bb;
3302         MonoMethodSignature *sig;
3303         MonoInst *inst;
3304         int alloc_size, pos, max_offset, i;
3305         guint8 *code;
3306
3307         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3308         code = cfg->native_code = g_malloc (cfg->code_size);
3309
3310         x86_push_reg (code, X86_EBP);
3311         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3312
3313         alloc_size = cfg->stack_offset;
3314         pos = 0;
3315
3316         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3317                 /* Might need to attach the thread to the JIT */
3318                 if (lmf_tls_offset != -1) {
3319                         guint8 *buf;
3320
3321                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3322                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3323                         buf = code;
3324                         x86_branch8 (code, X86_CC_NE, 0, 0);
3325                         x86_push_imm (code, cfg->domain);
3326                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3327                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3328                         x86_patch (buf, code);
3329 #ifdef PLATFORM_WIN32
3330                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3331                         /* FIXME: Add a separate key for LMF to avoid this */
3332                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3333 #endif
3334                 } else {
3335                         g_assert (!cfg->compile_aot);
3336                         x86_push_imm (code, cfg->domain);
3337                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3338                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3339                 }
3340         }
3341
3342         if (method->save_lmf) {
3343                 pos += sizeof (MonoLMF);
3344
3345                 /* save the current IP */
3346                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3347                 x86_push_imm_template (code);
3348
3349                 /* save all caller saved regs */
3350                 x86_push_reg (code, X86_EBP);
3351                 x86_push_reg (code, X86_ESI);
3352                 x86_push_reg (code, X86_EDI);
3353                 x86_push_reg (code, X86_EBX);
3354
3355                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3356                         /*
3357                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3358                          * through the mono_lmf_addr TLS variable.
3359                          */
3360                         /* %eax = previous_lmf */
3361                         x86_prefix (code, X86_GS_PREFIX);
3362                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3363                         /* skip method_info + lmf */
3364                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3365                         /* push previous_lmf */
3366                         x86_push_reg (code, X86_EAX);
3367                         /* new lmf = ESP */
3368                         x86_prefix (code, X86_GS_PREFIX);
3369                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
3370                 } else {
3371                         /* get the address of lmf for the current thread */
3372                         /* 
3373                          * This is performance critical so we try to use some tricks to make
3374                          * it fast.
3375                          */                                                                        
3376
3377                         if (lmf_addr_tls_offset != -1) {
3378                                 /* Load lmf quicky using the GS register */
3379                                 code = emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
3380 #ifdef PLATFORM_WIN32
3381                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3382                                 /* FIXME: Add a separate key for LMF to avoid this */
3383                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3384 #endif
3385                         } else {
3386                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3387                         }
3388
3389                         /* Skip method info */
3390                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3391
3392                         /* push lmf */
3393                         x86_push_reg (code, X86_EAX); 
3394                         /* push *lfm (previous_lmf) */
3395                         x86_push_membase (code, X86_EAX, 0);
3396                         /* *(lmf) = ESP */
3397                         x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3398                 }
3399         } else {
3400
3401                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3402                         x86_push_reg (code, X86_EBX);
3403                         pos += 4;
3404                 }
3405
3406                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3407                         x86_push_reg (code, X86_EDI);
3408                         pos += 4;
3409                 }
3410
3411                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3412                         x86_push_reg (code, X86_ESI);
3413                         pos += 4;
3414                 }
3415         }
3416
3417         alloc_size -= pos;
3418
3419 #if __APPLE__
3420         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3421         {
3422                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3423                 if (tot & 4) {
3424                         tot += 4;
3425                         alloc_size += 4;
3426                 }
3427                 if (tot & 8) {
3428                         alloc_size += 8;
3429                 }
3430         }
3431 #endif
3432
3433         if (alloc_size) {
3434                 /* See mono_emit_stack_alloc */
3435 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3436                 guint32 remaining_size = alloc_size;
3437                 while (remaining_size >= 0x1000) {
3438                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3439                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3440                         remaining_size -= 0x1000;
3441                 }
3442                 if (remaining_size)
3443                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3444 #else
3445                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3446 #endif
3447         }
3448
3449 #if __APPLE_
3450         /* check the stack is aligned */
3451         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3452         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3453         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3454         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3455         x86_breakpoint (code);
3456 #endif
3457
3458         /* compute max_offset in order to use short forward jumps */
3459         max_offset = 0;
3460         if (cfg->opt & MONO_OPT_BRANCH) {
3461                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3462                         MonoInst *ins = bb->code;
3463                         bb->max_offset = max_offset;
3464
3465                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3466                                 max_offset += 6;
3467                         /* max alignment for loops */
3468                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3469                                 max_offset += LOOP_ALIGNMENT;
3470
3471                         while (ins) {
3472                                 if (ins->opcode == OP_LABEL)
3473                                         ins->inst_c1 = max_offset;
3474                                 
3475                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3476                                 ins = ins->next;
3477                         }
3478                 }
3479         }
3480
3481         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3482                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3483
3484         /* load arguments allocated to register from the stack */
3485         sig = mono_method_signature (method);
3486         pos = 0;
3487
3488         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3489                 inst = cfg->varinfo [pos];
3490                 if (inst->opcode == OP_REGVAR) {
3491                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3492                         if (cfg->verbose_level > 2)
3493                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3494                 }
3495                 pos++;
3496         }
3497
3498         cfg->code_len = code - cfg->native_code;
3499
3500         return code;
3501 }
3502
3503 void
3504 mono_arch_emit_epilog (MonoCompile *cfg)
3505 {
3506         MonoMethod *method = cfg->method;
3507         MonoMethodSignature *sig = mono_method_signature (method);
3508         int quad, pos;
3509         guint32 stack_to_pop;
3510         guint8 *code;
3511         int max_epilog_size = 16;
3512         CallInfo *cinfo;
3513         
3514         if (cfg->method->save_lmf)
3515                 max_epilog_size += 128;
3516         
3517         if (mono_jit_trace_calls != NULL)
3518                 max_epilog_size += 50;
3519
3520         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3521                 cfg->code_size *= 2;
3522                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3523                 mono_jit_stats.code_reallocs++;
3524         }
3525
3526         code = cfg->native_code + cfg->code_len;
3527
3528         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3529                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3530
3531         /* the code restoring the registers must be kept in sync with CEE_JMP */
3532         pos = 0;
3533         
3534         if (method->save_lmf) {
3535                 gint32 prev_lmf_reg;
3536                 gint32 lmf_offset = -sizeof (MonoLMF);
3537
3538                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3539                         /*
3540                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3541                          * through the mono_lmf_addr TLS variable.
3542                          */
3543                         /* reg = previous_lmf */
3544                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3545
3546                         /* lmf = previous_lmf */
3547                         x86_prefix (code, X86_GS_PREFIX);
3548                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
3549                 } else {
3550                         /* Find a spare register */
3551                         switch (sig->ret->type) {
3552                         case MONO_TYPE_I8:
3553                         case MONO_TYPE_U8:
3554                                 prev_lmf_reg = X86_EDI;
3555                                 cfg->used_int_regs |= (1 << X86_EDI);
3556                                 break;
3557                         default:
3558                                 prev_lmf_reg = X86_EDX;
3559                                 break;
3560                         }
3561
3562                         /* reg = previous_lmf */
3563                         x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3564
3565                         /* ecx = lmf */
3566                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3567
3568                         /* *(lmf) = previous_lmf */
3569                         x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3570                 }
3571
3572                 /* restore caller saved regs */
3573                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3574                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3575                 }
3576
3577                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3578                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3579                 }
3580                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3581                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3582                 }
3583
3584                 /* EBP is restored by LEAVE */
3585         } else {
3586                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3587                         pos -= 4;
3588                 }
3589                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3590                         pos -= 4;
3591                 }
3592                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3593                         pos -= 4;
3594                 }
3595
3596                 if (pos)
3597                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3598
3599                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3600                         x86_pop_reg (code, X86_ESI);
3601                 }
3602                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3603                         x86_pop_reg (code, X86_EDI);
3604                 }
3605                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3606                         x86_pop_reg (code, X86_EBX);
3607                 }
3608         }
3609
3610         /* Load returned vtypes into registers if needed */
3611         cinfo = get_call_info (sig, FALSE);
3612         if (cinfo->ret.storage == ArgValuetypeInReg) {
3613                 for (quad = 0; quad < 2; quad ++) {
3614                         switch (cinfo->ret.pair_storage [quad]) {
3615                         case ArgInIReg:
3616                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3617                                 break;
3618                         case ArgOnFloatFpStack:
3619                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3620                                 break;
3621                         case ArgOnDoubleFpStack:
3622                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3623                                 break;
3624                         case ArgNone:
3625                                 break;
3626                         default:
3627                                 g_assert_not_reached ();
3628                         }
3629                 }
3630         }
3631
3632         x86_leave (code);
3633
3634         if (CALLCONV_IS_STDCALL (sig)) {
3635                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3636
3637                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3638         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3639                 stack_to_pop = 4;
3640         else
3641                 stack_to_pop = 0;
3642
3643         if (stack_to_pop)
3644                 x86_ret_imm (code, stack_to_pop);
3645         else
3646                 x86_ret (code);
3647
3648         g_free (cinfo);
3649
3650         cfg->code_len = code - cfg->native_code;
3651
3652         g_assert (cfg->code_len < cfg->code_size);
3653 }
3654
3655 void
3656 mono_arch_emit_exceptions (MonoCompile *cfg)
3657 {
3658         MonoJumpInfo *patch_info;
3659         int nthrows, i;
3660         guint8 *code;
3661         MonoClass *exc_classes [16];
3662         guint8 *exc_throw_start [16], *exc_throw_end [16];
3663         guint32 code_size;
3664         int exc_count = 0;
3665
3666         /* Compute needed space */
3667         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3668                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3669                         exc_count++;
3670         }
3671
3672         /* 
3673          * make sure we have enough space for exceptions
3674          * 16 is the size of two push_imm instructions and a call
3675          */
3676         if (cfg->compile_aot)
3677                 code_size = exc_count * 32;
3678         else
3679                 code_size = exc_count * 16;
3680
3681         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3682                 cfg->code_size *= 2;
3683                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3684                 mono_jit_stats.code_reallocs++;
3685         }
3686
3687         code = cfg->native_code + cfg->code_len;
3688
3689         nthrows = 0;
3690         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3691                 switch (patch_info->type) {
3692                 case MONO_PATCH_INFO_EXC: {
3693                         MonoClass *exc_class;
3694                         guint8 *buf, *buf2;
3695                         guint32 throw_ip;
3696
3697                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3698
3699                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3700                         g_assert (exc_class);
3701                         throw_ip = patch_info->ip.i;
3702
3703                         /* Find a throw sequence for the same exception class */
3704                         for (i = 0; i < nthrows; ++i)
3705                                 if (exc_classes [i] == exc_class)
3706                                         break;
3707                         if (i < nthrows) {
3708                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3709                                 x86_jump_code (code, exc_throw_start [i]);
3710                                 patch_info->type = MONO_PATCH_INFO_NONE;
3711                         }
3712                         else {
3713                                 guint32 size;
3714
3715                                 /* Compute size of code following the push <OFFSET> */
3716                                 size = 5 + 5;
3717
3718                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3719                                         /* Use the shorter form */
3720                                         buf = buf2 = code;
3721                                         x86_push_imm (code, 0);
3722                                 }
3723                                 else {
3724                                         buf = code;
3725                                         x86_push_imm (code, 0xf0f0f0f0);
3726                                         buf2 = code;
3727                                 }
3728
3729                                 if (nthrows < 16) {
3730                                         exc_classes [nthrows] = exc_class;
3731                                         exc_throw_start [nthrows] = code;
3732                                 }
3733
3734                                 x86_push_imm (code, exc_class->type_token);
3735                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3736                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3737                                 patch_info->ip.i = code - cfg->native_code;
3738                                 x86_call_code (code, 0);
3739                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3740                                 while (buf < buf2)
3741                                         x86_nop (buf);
3742
3743                                 if (nthrows < 16) {
3744                                         exc_throw_end [nthrows] = code;
3745                                         nthrows ++;
3746                                 }
3747                         }
3748                         break;
3749                 }
3750                 default:
3751                         /* do nothing */
3752                         break;
3753                 }
3754         }
3755
3756         cfg->code_len = code - cfg->native_code;
3757
3758         g_assert (cfg->code_len < cfg->code_size);
3759 }
3760
3761 void
3762 mono_arch_flush_icache (guint8 *code, gint size)
3763 {
3764         /* not needed */
3765 }
3766
3767 void
3768 mono_arch_flush_register_windows (void)
3769 {
3770 }
3771
3772 /*
3773  * Support for fast access to the thread-local lmf structure using the GS
3774  * segment register on NPTL + kernel 2.6.x.
3775  */
3776
3777 static gboolean tls_offset_inited = FALSE;
3778
3779 void
3780 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3781 {
3782         if (!tls_offset_inited) {
3783                 if (!getenv ("MONO_NO_TLS")) {
3784 #ifdef PLATFORM_WIN32
3785                         /* 
3786                          * We need to init this multiple times, since when we are first called, the key might not
3787                          * be initialized yet.
3788                          */
3789                         appdomain_tls_offset = mono_domain_get_tls_key ();
3790                         lmf_tls_offset = mono_get_jit_tls_key ();
3791                         thread_tls_offset = mono_thread_get_tls_key ();
3792
3793                         /* Only 64 tls entries can be accessed using inline code */
3794                         if (appdomain_tls_offset >= 64)
3795                                 appdomain_tls_offset = -1;
3796                         if (lmf_tls_offset >= 64)
3797                                 lmf_tls_offset = -1;
3798                         if (thread_tls_offset >= 64)
3799                                 thread_tls_offset = -1;
3800 #else
3801 #if MONO_XEN_OPT
3802                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
3803 #endif
3804                         tls_offset_inited = TRUE;
3805                         appdomain_tls_offset = mono_domain_get_tls_offset ();
3806                         lmf_tls_offset = mono_get_lmf_tls_offset ();
3807                         lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
3808                         thread_tls_offset = mono_thread_get_tls_offset ();
3809 #endif
3810                 }
3811         }               
3812 }
3813
3814 void
3815 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
3816 {
3817 }
3818
3819 void
3820 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3821 {
3822         MonoCallInst *call = (MonoCallInst*)inst;
3823         CallInfo *cinfo = get_call_info (inst->signature, FALSE);
3824
3825         /* add the this argument */
3826         if (this_reg != -1) {
3827                 if (cinfo->args [0].storage == ArgInIReg) {
3828                         MonoInst *this;
3829                         MONO_INST_NEW (cfg, this, OP_MOVE);
3830                         this->type = this_type;
3831                         this->sreg1 = this_reg;
3832                         this->dreg = mono_regstate_next_int (cfg->rs);
3833                         mono_bblock_add_inst (cfg->cbb, this);
3834
3835                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
3836                 }
3837                 else {
3838                         MonoInst *this;
3839                         MONO_INST_NEW (cfg, this, OP_OUTARG);
3840                         this->type = this_type;
3841                         this->sreg1 = this_reg;
3842                         mono_bblock_add_inst (cfg->cbb, this);
3843                 }
3844         }
3845
3846         if (vt_reg != -1) {
3847                 MonoInst *vtarg;
3848
3849                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3850                         /*
3851                          * The valuetype is in EAX:EDX after the call, needs to be copied to
3852                          * the stack. Save the address here, so the call instruction can
3853                          * access it.
3854                          */
3855                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
3856                         vtarg->inst_destbasereg = X86_ESP;
3857                         vtarg->inst_offset = inst->stack_usage;
3858                         vtarg->sreg1 = vt_reg;
3859                         mono_bblock_add_inst (cfg->cbb, vtarg);
3860                 }
3861                 else if (cinfo->ret.storage == ArgInIReg) {
3862                         /* The return address is passed in a register */
3863                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
3864                         vtarg->sreg1 = vt_reg;
3865                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
3866                         mono_bblock_add_inst (cfg->cbb, vtarg);
3867
3868                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
3869                 } else {
3870                         MonoInst *vtarg;
3871                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3872                         vtarg->type = STACK_MP;
3873                         vtarg->sreg1 = vt_reg;
3874                         mono_bblock_add_inst (cfg->cbb, vtarg);
3875                 }
3876         }
3877
3878         g_free (cinfo);
3879 }
3880
3881 MonoInst*
3882 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3883 {
3884         MonoInst *ins = NULL;
3885
3886         if (cmethod->klass == mono_defaults.math_class) {
3887                 if (strcmp (cmethod->name, "Sin") == 0) {
3888                         MONO_INST_NEW (cfg, ins, OP_SIN);
3889                         ins->inst_i0 = args [0];
3890                 } else if (strcmp (cmethod->name, "Cos") == 0) {
3891                         MONO_INST_NEW (cfg, ins, OP_COS);
3892                         ins->inst_i0 = args [0];
3893                 } else if (strcmp (cmethod->name, "Tan") == 0) {
3894                         MONO_INST_NEW (cfg, ins, OP_TAN);
3895                         ins->inst_i0 = args [0];
3896                 } else if (strcmp (cmethod->name, "Atan") == 0) {
3897                         MONO_INST_NEW (cfg, ins, OP_ATAN);
3898                         ins->inst_i0 = args [0];
3899                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
3900                         MONO_INST_NEW (cfg, ins, OP_SQRT);
3901                         ins->inst_i0 = args [0];
3902                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
3903                         MONO_INST_NEW (cfg, ins, OP_ABS);
3904                         ins->inst_i0 = args [0];
3905                 }
3906 #if 0
3907                 /* OP_FREM is not IEEE compatible */
3908                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
3909                         MONO_INST_NEW (cfg, ins, OP_FREM);
3910                         ins->inst_i0 = args [0];
3911                         ins->inst_i1 = args [1];
3912                 }
3913 #endif
3914         } else if (cmethod->klass == mono_defaults.thread_class &&
3915                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
3916                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
3917         } else if(cmethod->klass->image == mono_defaults.corlib &&
3918                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
3919                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
3920
3921                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3922                         MonoInst *ins_iconst;
3923
3924                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3925                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3926                         ins_iconst->inst_c0 = 1;
3927
3928                         ins->inst_i0 = args [0];
3929                         ins->inst_i1 = ins_iconst;
3930                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3931                         MonoInst *ins_iconst;
3932
3933                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3934                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3935                         ins_iconst->inst_c0 = -1;
3936
3937                         ins->inst_i0 = args [0];
3938                         ins->inst_i1 = ins_iconst;
3939                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3940                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
3941
3942                         ins->inst_i0 = args [0];
3943                         ins->inst_i1 = args [1];
3944                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3945                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3946
3947                         ins->inst_i0 = args [0];
3948                         ins->inst_i1 = args [1];
3949                 }
3950         }
3951
3952         return ins;
3953 }
3954
3955
3956 gboolean
3957 mono_arch_print_tree (MonoInst *tree, int arity)
3958 {
3959         return 0;
3960 }
3961
3962 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
3963 {
3964         MonoInst* ins;
3965         
3966         if (appdomain_tls_offset == -1)
3967                 return NULL;
3968
3969         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3970         ins->inst_offset = appdomain_tls_offset;
3971         return ins;
3972 }
3973
3974 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
3975 {
3976         MonoInst* ins;
3977
3978         if (thread_tls_offset == -1)
3979                 return NULL;
3980
3981         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3982         ins->inst_offset = thread_tls_offset;
3983         return ins;
3984 }
3985
3986 guint32
3987 mono_arch_get_patch_offset (guint8 *code)
3988 {
3989         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
3990                 return 2;
3991         else if ((code [0] == 0xba))
3992                 return 1;
3993         else if ((code [0] == 0x68))
3994                 /* push IMM */
3995                 return 1;
3996         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
3997                 /* push <OFFSET>(<REG>) */
3998                 return 2;
3999         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4000                 /* call *<OFFSET>(<REG>) */
4001                 return 2;
4002         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4003                 /* fldl <ADDR> */
4004                 return 2;
4005         else if ((code [0] == 0x58) && (code [1] == 0x05))
4006                 /* pop %eax; add <OFFSET>, %eax */
4007                 return 2;
4008         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4009                 /* pop <REG>; add <OFFSET>, <REG> */
4010                 return 3;
4011         else {
4012                 g_assert_not_reached ();
4013                 return -1;
4014         }
4015 }
4016
4017 gpointer*
4018 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4019 {
4020         guint8 reg = 0;
4021         gint32 disp = 0;
4022
4023         /* go to the start of the call instruction
4024          *
4025          * address_byte = (m << 6) | (o << 3) | reg
4026          * call opcode: 0xff address_byte displacement
4027          * 0xff m=1,o=2 imm8
4028          * 0xff m=2,o=2 imm32
4029          */
4030         code -= 6;
4031
4032         /* 
4033          * A given byte sequence can match more than case here, so we have to be
4034          * really careful about the ordering of the cases. Longer sequences
4035          * come first.
4036          */
4037         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
4038                 /*
4039                  * This is an interface call
4040                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
4041                  * ff 10                   call   *(%eax)
4042                  */
4043                 reg = x86_modrm_rm (code [5]);
4044                 disp = 0;
4045         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4046                 reg = code [4] & 0x07;
4047                 disp = (signed char)code [5];
4048         } else {
4049                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4050                         reg = code [1] & 0x07;
4051                         disp = *((gint32*)(code + 2));
4052                 } else if ((code [1] == 0xe8)) {
4053                         return NULL;
4054                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4055                         /*
4056                          * This is a interface call
4057                          * 8b 40 30   mov    0x30(%eax),%eax
4058                          * ff 10      call   *(%eax)
4059                          */
4060                         disp = 0;
4061                         reg = code [5] & 0x07;
4062                 }
4063                 else
4064                         return NULL;
4065         }
4066
4067         return (gpointer*)(((gint32)(regs [reg])) + disp);
4068 }
4069
4070 gpointer* 
4071 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
4072 {
4073         guint8 reg = 0;
4074         gint32 disp = 0;
4075
4076         code -= 7;
4077         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
4078                 reg = x86_modrm_rm (code [1]);
4079                 disp = code [4];
4080
4081                 if (reg == X86_EAX)
4082                         return NULL;
4083                 else
4084                         return (gpointer*)(((gint32)(regs [reg])) + disp);
4085         }
4086
4087         return NULL;
4088 }