2007-05-12 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #include <unistd.h>
15
16 #include <mono/metadata/appdomain.h>
17 #include <mono/metadata/debug-helpers.h>
18 #include <mono/metadata/threads.h>
19 #include <mono/metadata/profiler-private.h>
20 #include <mono/utils/mono-math.h>
21
22 #include "trace.h"
23 #include "mini-x86.h"
24 #include "inssel.h"
25 #include "cpu-x86.h"
26
27 /* On windows, these hold the key returned by TlsAlloc () */
28 static gint lmf_tls_offset = -1;
29 static gint lmf_addr_tls_offset = -1;
30 static gint appdomain_tls_offset = -1;
31 static gint thread_tls_offset = -1;
32
33 #ifdef MONO_XEN_OPT
34 static gboolean optimize_for_xen = TRUE;
35 #else
36 #define optimize_for_xen 0
37 #endif
38
39 #ifdef PLATFORM_WIN32
40 static gboolean is_win32 = TRUE;
41 #else
42 static gboolean is_win32 = FALSE;
43 #endif
44
45 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
46
47 #define ARGS_OFFSET 8
48
49 #ifdef PLATFORM_WIN32
50 /* Under windows, the default pinvoke calling convention is stdcall */
51 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
52 #else
53 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
54 #endif
55
56 #define NOT_IMPLEMENTED g_assert_not_reached ()
57
58 const char*
59 mono_arch_regname (int reg) {
60         switch (reg) {
61         case X86_EAX: return "%eax";
62         case X86_EBX: return "%ebx";
63         case X86_ECX: return "%ecx";
64         case X86_EDX: return "%edx";
65         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
66         case X86_EDI: return "%edi";
67         case X86_ESI: return "%esi";
68         }
69         return "unknown";
70 }
71
72 const char*
73 mono_arch_fregname (int reg) {
74         return "unknown";
75 }
76
77 typedef enum {
78         ArgInIReg,
79         ArgInFloatSSEReg,
80         ArgInDoubleSSEReg,
81         ArgOnStack,
82         ArgValuetypeInReg,
83         ArgOnFloatFpStack,
84         ArgOnDoubleFpStack,
85         ArgNone
86 } ArgStorage;
87
88 typedef struct {
89         gint16 offset;
90         gint8  reg;
91         ArgStorage storage;
92
93         /* Only if storage == ArgValuetypeInReg */
94         ArgStorage pair_storage [2];
95         gint8 pair_regs [2];
96 } ArgInfo;
97
98 typedef struct {
99         int nargs;
100         guint32 stack_usage;
101         guint32 reg_usage;
102         guint32 freg_usage;
103         gboolean need_stack_align;
104         guint32 stack_align_amount;
105         ArgInfo ret;
106         ArgInfo sig_cookie;
107         ArgInfo args [1];
108 } CallInfo;
109
110 #define PARAM_REGS 0
111
112 #define FLOAT_PARAM_REGS 0
113
114 static X86_Reg_No param_regs [] = { 0 };
115
116 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
117 #define SMALL_STRUCTS_IN_REGS
118 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
119 #endif
120
121 static void inline
122 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
123 {
124     ainfo->offset = *stack_size;
125
126     if (*gr >= PARAM_REGS) {
127                 ainfo->storage = ArgOnStack;
128                 (*stack_size) += sizeof (gpointer);
129     }
130     else {
131                 ainfo->storage = ArgInIReg;
132                 ainfo->reg = param_regs [*gr];
133                 (*gr) ++;
134     }
135 }
136
137 static void inline
138 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
139 {
140         ainfo->offset = *stack_size;
141
142         g_assert (PARAM_REGS == 0);
143         
144         ainfo->storage = ArgOnStack;
145         (*stack_size) += sizeof (gpointer) * 2;
146 }
147
148 static void inline
149 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
150 {
151     ainfo->offset = *stack_size;
152
153     if (*gr >= FLOAT_PARAM_REGS) {
154                 ainfo->storage = ArgOnStack;
155                 (*stack_size) += is_double ? 8 : 4;
156     }
157     else {
158                 /* A double register */
159                 if (is_double)
160                         ainfo->storage = ArgInDoubleSSEReg;
161                 else
162                         ainfo->storage = ArgInFloatSSEReg;
163                 ainfo->reg = *gr;
164                 (*gr) += 1;
165     }
166 }
167
168
169 static void
170 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
171                gboolean is_return,
172                guint32 *gr, guint32 *fr, guint32 *stack_size)
173 {
174         guint32 size;
175         MonoClass *klass;
176
177         klass = mono_class_from_mono_type (type);
178         if (sig->pinvoke) 
179                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
180         else 
181                 size = mono_type_stack_size (&klass->byval_arg, NULL);
182
183 #ifdef SMALL_STRUCTS_IN_REGS
184         if (sig->pinvoke && is_return) {
185                 MonoMarshalType *info;
186
187                 /*
188                  * the exact rules are not very well documented, the code below seems to work with the 
189                  * code generated by gcc 3.3.3 -mno-cygwin.
190                  */
191                 info = mono_marshal_load_type_info (klass);
192                 g_assert (info);
193
194                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
195
196                 /* Special case structs with only a float member */
197                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
198                         ainfo->storage = ArgValuetypeInReg;
199                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
200                         return;
201                 }
202                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
203                         ainfo->storage = ArgValuetypeInReg;
204                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
205                         return;
206                 }               
207                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
208                         ainfo->storage = ArgValuetypeInReg;
209                         ainfo->pair_storage [0] = ArgInIReg;
210                         ainfo->pair_regs [0] = return_regs [0];
211                         if (info->native_size > 4) {
212                                 ainfo->pair_storage [1] = ArgInIReg;
213                                 ainfo->pair_regs [1] = return_regs [1];
214                         }
215                         return;
216                 }
217         }
218 #endif
219
220         ainfo->offset = *stack_size;
221         ainfo->storage = ArgOnStack;
222         *stack_size += ALIGN_TO (size, sizeof (gpointer));
223 }
224
225 /*
226  * get_call_info:
227  *
228  *  Obtain information about a call according to the calling convention.
229  * For x86 ELF, see the "System V Application Binary Interface Intel386 
230  * Architecture Processor Supplment, Fourth Edition" document for more
231  * information.
232  * For x86 win32, see ???.
233  */
234 static CallInfo*
235 get_call_info (MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
236 {
237         guint32 i, gr, fr;
238         MonoType *ret_type;
239         int n = sig->hasthis + sig->param_count;
240         guint32 stack_size = 0;
241         CallInfo *cinfo;
242
243         if (mp)
244                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
245         else
246                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
247
248         gr = 0;
249         fr = 0;
250
251         /* return value */
252         {
253                 ret_type = mono_type_get_underlying_type (sig->ret);
254                 switch (ret_type->type) {
255                 case MONO_TYPE_BOOLEAN:
256                 case MONO_TYPE_I1:
257                 case MONO_TYPE_U1:
258                 case MONO_TYPE_I2:
259                 case MONO_TYPE_U2:
260                 case MONO_TYPE_CHAR:
261                 case MONO_TYPE_I4:
262                 case MONO_TYPE_U4:
263                 case MONO_TYPE_I:
264                 case MONO_TYPE_U:
265                 case MONO_TYPE_PTR:
266                 case MONO_TYPE_FNPTR:
267                 case MONO_TYPE_CLASS:
268                 case MONO_TYPE_OBJECT:
269                 case MONO_TYPE_SZARRAY:
270                 case MONO_TYPE_ARRAY:
271                 case MONO_TYPE_STRING:
272                         cinfo->ret.storage = ArgInIReg;
273                         cinfo->ret.reg = X86_EAX;
274                         break;
275                 case MONO_TYPE_U8:
276                 case MONO_TYPE_I8:
277                         cinfo->ret.storage = ArgInIReg;
278                         cinfo->ret.reg = X86_EAX;
279                         break;
280                 case MONO_TYPE_R4:
281                         cinfo->ret.storage = ArgOnFloatFpStack;
282                         break;
283                 case MONO_TYPE_R8:
284                         cinfo->ret.storage = ArgOnDoubleFpStack;
285                         break;
286                 case MONO_TYPE_GENERICINST:
287                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
288                                 cinfo->ret.storage = ArgInIReg;
289                                 cinfo->ret.reg = X86_EAX;
290                                 break;
291                         }
292                         /* Fall through */
293                 case MONO_TYPE_VALUETYPE: {
294                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
295
296                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
297                         if (cinfo->ret.storage == ArgOnStack)
298                                 /* The caller passes the address where the value is stored */
299                                 add_general (&gr, &stack_size, &cinfo->ret);
300                         break;
301                 }
302                 case MONO_TYPE_TYPEDBYREF:
303                         /* Same as a valuetype with size 24 */
304                         add_general (&gr, &stack_size, &cinfo->ret);
305                         ;
306                         break;
307                 case MONO_TYPE_VOID:
308                         cinfo->ret.storage = ArgNone;
309                         break;
310                 default:
311                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
312                 }
313         }
314
315         /* this */
316         if (sig->hasthis)
317                 add_general (&gr, &stack_size, cinfo->args + 0);
318
319         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
320                 gr = PARAM_REGS;
321                 fr = FLOAT_PARAM_REGS;
322                 
323                 /* Emit the signature cookie just before the implicit arguments */
324                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
325         }
326
327         for (i = 0; i < sig->param_count; ++i) {
328                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
329                 MonoType *ptype;
330
331                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
332                         /* We allways pass the sig cookie on the stack for simplicity */
333                         /* 
334                          * Prevent implicit arguments + the sig cookie from being passed 
335                          * in registers.
336                          */
337                         gr = PARAM_REGS;
338                         fr = FLOAT_PARAM_REGS;
339
340                         /* Emit the signature cookie just before the implicit arguments */
341                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
342                 }
343
344                 if (sig->params [i]->byref) {
345                         add_general (&gr, &stack_size, ainfo);
346                         continue;
347                 }
348                 ptype = mono_type_get_underlying_type (sig->params [i]);
349                 switch (ptype->type) {
350                 case MONO_TYPE_BOOLEAN:
351                 case MONO_TYPE_I1:
352                 case MONO_TYPE_U1:
353                         add_general (&gr, &stack_size, ainfo);
354                         break;
355                 case MONO_TYPE_I2:
356                 case MONO_TYPE_U2:
357                 case MONO_TYPE_CHAR:
358                         add_general (&gr, &stack_size, ainfo);
359                         break;
360                 case MONO_TYPE_I4:
361                 case MONO_TYPE_U4:
362                         add_general (&gr, &stack_size, ainfo);
363                         break;
364                 case MONO_TYPE_I:
365                 case MONO_TYPE_U:
366                 case MONO_TYPE_PTR:
367                 case MONO_TYPE_FNPTR:
368                 case MONO_TYPE_CLASS:
369                 case MONO_TYPE_OBJECT:
370                 case MONO_TYPE_STRING:
371                 case MONO_TYPE_SZARRAY:
372                 case MONO_TYPE_ARRAY:
373                         add_general (&gr, &stack_size, ainfo);
374                         break;
375                 case MONO_TYPE_GENERICINST:
376                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
377                                 add_general (&gr, &stack_size, ainfo);
378                                 break;
379                         }
380                         /* Fall through */
381                 case MONO_TYPE_VALUETYPE:
382                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
383                         break;
384                 case MONO_TYPE_TYPEDBYREF:
385                         stack_size += sizeof (MonoTypedRef);
386                         ainfo->storage = ArgOnStack;
387                         break;
388                 case MONO_TYPE_U8:
389                 case MONO_TYPE_I8:
390                         add_general_pair (&gr, &stack_size, ainfo);
391                         break;
392                 case MONO_TYPE_R4:
393                         add_float (&fr, &stack_size, ainfo, FALSE);
394                         break;
395                 case MONO_TYPE_R8:
396                         add_float (&fr, &stack_size, ainfo, TRUE);
397                         break;
398                 default:
399                         g_error ("unexpected type 0x%x", ptype->type);
400                         g_assert_not_reached ();
401                 }
402         }
403
404         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
405                 gr = PARAM_REGS;
406                 fr = FLOAT_PARAM_REGS;
407                 
408                 /* Emit the signature cookie just before the implicit arguments */
409                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
410         }
411
412 #if defined(__APPLE__)
413         if ((stack_size % 16) != 0) { 
414                 cinfo->need_stack_align = TRUE;
415                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
416         }
417 #endif
418
419         cinfo->stack_usage = stack_size;
420         cinfo->reg_usage = gr;
421         cinfo->freg_usage = fr;
422         return cinfo;
423 }
424
425 /*
426  * mono_arch_get_argument_info:
427  * @csig:  a method signature
428  * @param_count: the number of parameters to consider
429  * @arg_info: an array to store the result infos
430  *
431  * Gathers information on parameters such as size, alignment and
432  * padding. arg_info should be large enought to hold param_count + 1 entries. 
433  *
434  * Returns the size of the activation frame.
435  */
436 int
437 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
438 {
439         int k, frame_size = 0;
440         int size, pad;
441         guint32 align;
442         int offset = 8;
443         CallInfo *cinfo;
444
445         cinfo = get_call_info (NULL, csig, FALSE);
446
447         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
448                 frame_size += sizeof (gpointer);
449                 offset += 4;
450         }
451
452         arg_info [0].offset = offset;
453
454         if (csig->hasthis) {
455                 frame_size += sizeof (gpointer);
456                 offset += 4;
457         }
458
459         arg_info [0].size = frame_size;
460
461         for (k = 0; k < param_count; k++) {
462                 
463                 if (csig->pinvoke)
464                         size = mono_type_native_stack_size (csig->params [k], &align);
465                 else {
466                         int ialign;
467                         size = mono_type_stack_size (csig->params [k], &ialign);
468                         align = ialign;
469                 }
470
471                 /* ignore alignment for now */
472                 align = 1;
473
474                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
475                 arg_info [k].pad = pad;
476                 frame_size += size;
477                 arg_info [k + 1].pad = 0;
478                 arg_info [k + 1].size = size;
479                 offset += pad;
480                 arg_info [k + 1].offset = offset;
481                 offset += size;
482         }
483
484         align = MONO_ARCH_FRAME_ALIGNMENT;
485         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
486         arg_info [k].pad = pad;
487
488         g_free (cinfo);
489
490         return frame_size;
491 }
492
493 static const guchar cpuid_impl [] = {
494         0x55,                           /* push   %ebp */
495         0x89, 0xe5,                     /* mov    %esp,%ebp */
496         0x53,                           /* push   %ebx */
497         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
498         0x0f, 0xa2,                     /* cpuid   */
499         0x50,                           /* push   %eax */
500         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
501         0x89, 0x18,                     /* mov    %ebx,(%eax) */
502         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
503         0x89, 0x08,                     /* mov    %ecx,(%eax) */
504         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
505         0x89, 0x10,                     /* mov    %edx,(%eax) */
506         0x58,                           /* pop    %eax */
507         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
508         0x89, 0x02,                     /* mov    %eax,(%edx) */
509         0x5b,                           /* pop    %ebx */
510         0xc9,                           /* leave   */
511         0xc3,                           /* ret     */
512 };
513
514 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
515
516 static int 
517 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
518 {
519         int have_cpuid = 0;
520 #ifndef _MSC_VER
521         __asm__  __volatile__ (
522                 "pushfl\n"
523                 "popl %%eax\n"
524                 "movl %%eax, %%edx\n"
525                 "xorl $0x200000, %%eax\n"
526                 "pushl %%eax\n"
527                 "popfl\n"
528                 "pushfl\n"
529                 "popl %%eax\n"
530                 "xorl %%edx, %%eax\n"
531                 "andl $0x200000, %%eax\n"
532                 "movl %%eax, %0"
533                 : "=r" (have_cpuid)
534                 :
535                 : "%eax", "%edx"
536         );
537 #else
538         __asm {
539                 pushfd
540                 pop eax
541                 mov edx, eax
542                 xor eax, 0x200000
543                 push eax
544                 popfd
545                 pushfd
546                 pop eax
547                 xor eax, edx
548                 and eax, 0x200000
549                 mov have_cpuid, eax
550         }
551 #endif
552         if (have_cpuid) {
553                 /* Have to use the code manager to get around WinXP DEP */
554                 static CpuidFunc func = NULL;
555                 void *ptr;
556                 if (!func) {
557                         ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
558                         memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
559                         func = (CpuidFunc)ptr;
560                 }
561                 func (id, p_eax, p_ebx, p_ecx, p_edx);
562
563                 /*
564                  * We use this approach because of issues with gcc and pic code, see:
565                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
566                 __asm__ __volatile__ ("cpuid"
567                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
568                         : "a" (id));
569                 */
570                 return 1;
571         }
572         return 0;
573 }
574
575 /*
576  * Initialize the cpu to execute managed code.
577  */
578 void
579 mono_arch_cpu_init (void)
580 {
581         /* spec compliance requires running with double precision */
582 #ifndef _MSC_VER
583         guint16 fpcw;
584
585         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
586         fpcw &= ~X86_FPCW_PRECC_MASK;
587         fpcw |= X86_FPCW_PREC_DOUBLE;
588         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
589         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
590 #else
591         _control87 (_PC_53, MCW_PC);
592 #endif
593 }
594
595 /*
596  * This function returns the optimizations supported on this cpu.
597  */
598 guint32
599 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
600 {
601         int eax, ebx, ecx, edx;
602         guint32 opts = 0;
603         
604         *exclude_mask = 0;
605         /* Feature Flags function, flags returned in EDX. */
606         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
607                 if (edx & (1 << 15)) {
608                         opts |= MONO_OPT_CMOV;
609                         if (edx & 1)
610                                 opts |= MONO_OPT_FCMOV;
611                         else
612                                 *exclude_mask |= MONO_OPT_FCMOV;
613                 } else
614                         *exclude_mask |= MONO_OPT_CMOV;
615         }
616         return opts;
617 }
618
619 /*
620  * Determine whenever the trap whose info is in SIGINFO is caused by
621  * integer overflow.
622  */
623 gboolean
624 mono_arch_is_int_overflow (void *sigctx, void *info)
625 {
626         MonoContext ctx;
627         guint8* ip;
628
629         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
630
631         ip = (guint8*)ctx.eip;
632
633         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
634                 gint32 reg;
635
636                 /* idiv REG */
637                 switch (x86_modrm_rm (ip [1])) {
638                 case X86_EAX:
639                         reg = ctx.eax;
640                         break;
641                 case X86_ECX:
642                         reg = ctx.ecx;
643                         break;
644                 case X86_EDX:
645                         reg = ctx.edx;
646                         break;
647                 case X86_EBX:
648                         reg = ctx.ebx;
649                         break;
650                 case X86_ESI:
651                         reg = ctx.esi;
652                         break;
653                 case X86_EDI:
654                         reg = ctx.edi;
655                         break;
656                 default:
657                         g_assert_not_reached ();
658                         reg = -1;
659                 }
660
661                 if (reg == -1)
662                         return TRUE;
663         }
664                         
665         return FALSE;
666 }
667
668 GList *
669 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
670 {
671         GList *vars = NULL;
672         int i;
673
674         for (i = 0; i < cfg->num_varinfo; i++) {
675                 MonoInst *ins = cfg->varinfo [i];
676                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
677
678                 /* unused vars */
679                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
680                         continue;
681
682                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
683                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
684                         continue;
685
686                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
687                  * 8bit quantities in caller saved registers on x86 */
688                 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
689                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
690                         g_assert (i == vmv->idx);
691                         vars = g_list_prepend (vars, vmv);
692                 }
693         }
694
695         vars = mono_varlist_sort (cfg, vars, 0);
696
697         return vars;
698 }
699
700 GList *
701 mono_arch_get_global_int_regs (MonoCompile *cfg)
702 {
703         GList *regs = NULL;
704
705         /* we can use 3 registers for global allocation */
706         regs = g_list_prepend (regs, (gpointer)X86_EBX);
707         regs = g_list_prepend (regs, (gpointer)X86_ESI);
708         regs = g_list_prepend (regs, (gpointer)X86_EDI);
709
710         return regs;
711 }
712
713 /*
714  * mono_arch_regalloc_cost:
715  *
716  *  Return the cost, in number of memory references, of the action of 
717  * allocating the variable VMV into a register during global register
718  * allocation.
719  */
720 guint32
721 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
722 {
723         MonoInst *ins = cfg->varinfo [vmv->idx];
724
725         if (cfg->method->save_lmf)
726                 /* The register is already saved */
727                 return (ins->opcode == OP_ARG) ? 1 : 0;
728         else
729                 /* push+pop+possible load if it is an argument */
730                 return (ins->opcode == OP_ARG) ? 3 : 2;
731 }
732  
733 /*
734  * Set var information according to the calling convention. X86 version.
735  * The locals var stuff should most likely be split in another method.
736  */
737 void
738 mono_arch_allocate_vars (MonoCompile *cfg)
739 {
740         MonoMethodSignature *sig;
741         MonoMethodHeader *header;
742         MonoInst *inst;
743         guint32 locals_stack_size, locals_stack_align;
744         int i, offset;
745         gint32 *offsets;
746         CallInfo *cinfo;
747
748         header = mono_method_get_header (cfg->method);
749         sig = mono_method_signature (cfg->method);
750
751         cinfo = get_call_info (cfg->mempool, sig, FALSE);
752
753         cfg->frame_reg = MONO_ARCH_BASEREG;
754         offset = 0;
755
756         /* Reserve space to save LMF and caller saved registers */
757
758         if (cfg->method->save_lmf) {
759                 offset += sizeof (MonoLMF);
760         } else {
761                 if (cfg->used_int_regs & (1 << X86_EBX)) {
762                         offset += 4;
763                 }
764
765                 if (cfg->used_int_regs & (1 << X86_EDI)) {
766                         offset += 4;
767                 }
768
769                 if (cfg->used_int_regs & (1 << X86_ESI)) {
770                         offset += 4;
771                 }
772         }
773
774         switch (cinfo->ret.storage) {
775         case ArgValuetypeInReg:
776                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
777                 offset += 8;
778                 cfg->ret->opcode = OP_REGOFFSET;
779                 cfg->ret->inst_basereg = X86_EBP;
780                 cfg->ret->inst_offset = - offset;
781                 break;
782         default:
783                 break;
784         }
785
786         /* Allocate locals */
787         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
788         if (locals_stack_align) {
789                 offset += (locals_stack_align - 1);
790                 offset &= ~(locals_stack_align - 1);
791         }
792         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
793                 if (offsets [i] != -1) {
794                         MonoInst *inst = cfg->varinfo [i];
795                         inst->opcode = OP_REGOFFSET;
796                         inst->inst_basereg = X86_EBP;
797                         inst->inst_offset = - (offset + offsets [i]);
798                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
799                 }
800         }
801         offset += locals_stack_size;
802
803
804         /*
805          * Allocate arguments+return value
806          */
807
808         switch (cinfo->ret.storage) {
809         case ArgOnStack:
810                 cfg->ret->opcode = OP_REGOFFSET;
811                 cfg->ret->inst_basereg = X86_EBP;
812                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
813                 break;
814         case ArgValuetypeInReg:
815                 break;
816         case ArgInIReg:
817                 cfg->ret->opcode = OP_REGVAR;
818                 cfg->ret->inst_c0 = cinfo->ret.reg;
819                 break;
820         case ArgNone:
821         case ArgOnFloatFpStack:
822         case ArgOnDoubleFpStack:
823                 break;
824         default:
825                 g_assert_not_reached ();
826         }
827
828         if (sig->call_convention == MONO_CALL_VARARG) {
829                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
830                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
831         }
832
833         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
834                 ArgInfo *ainfo = &cinfo->args [i];
835                 inst = cfg->args [i];
836                 if (inst->opcode != OP_REGVAR) {
837                         inst->opcode = OP_REGOFFSET;
838                         inst->inst_basereg = X86_EBP;
839                 }
840                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
841         }
842
843         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
844         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
845
846         cfg->stack_offset = offset;
847 }
848
849 void
850 mono_arch_create_vars (MonoCompile *cfg)
851 {
852         MonoMethodSignature *sig;
853         CallInfo *cinfo;
854
855         sig = mono_method_signature (cfg->method);
856
857         cinfo = get_call_info (cfg->mempool, sig, FALSE);
858
859         if (cinfo->ret.storage == ArgValuetypeInReg)
860                 cfg->ret_var_is_local = TRUE;
861 }
862
863 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
864  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
865  */
866
867 static void
868 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
869 {
870         MonoInst *arg;
871         MonoMethodSignature *tmp_sig;
872         MonoInst *sig_arg;
873
874         /* FIXME: Add support for signature tokens to AOT */
875         cfg->disable_aot = TRUE;
876         MONO_INST_NEW (cfg, arg, OP_OUTARG);
877
878         /*
879          * mono_ArgIterator_Setup assumes the signature cookie is 
880          * passed first and all the arguments which were before it are
881          * passed on the stack after the signature. So compensate by 
882          * passing a different signature.
883          */
884         tmp_sig = mono_metadata_signature_dup (call->signature);
885         tmp_sig->param_count -= call->signature->sentinelpos;
886         tmp_sig->sentinelpos = 0;
887         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
888
889         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
890         sig_arg->inst_p0 = tmp_sig;
891
892         arg->inst_left = sig_arg;
893         arg->type = STACK_PTR;
894         /* prepend, so they get reversed */
895         arg->next = call->out_args;
896         call->out_args = arg;
897 }
898
899 /* 
900  * take the arguments and generate the arch-specific
901  * instructions to properly call the function in call.
902  * This includes pushing, moving arguments to the right register
903  * etc.
904  */
905 MonoCallInst*
906 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
907         MonoInst *arg, *in;
908         MonoMethodSignature *sig;
909         int i, n;
910         CallInfo *cinfo;
911         int sentinelpos = 0;
912
913         sig = call->signature;
914         n = sig->param_count + sig->hasthis;
915
916         cinfo = get_call_info (cfg->mempool, sig, FALSE);
917
918         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
919                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
920
921         for (i = 0; i < n; ++i) {
922                 ArgInfo *ainfo = cinfo->args + i;
923
924                 /* Emit the signature cookie just before the implicit arguments */
925                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
926                         emit_sig_cookie (cfg, call);
927                 }
928
929                 if (is_virtual && i == 0) {
930                         /* the argument will be attached to the call instrucion */
931                         in = call->args [i];
932                 } else {
933                         MonoType *t;
934
935                         if (i >= sig->hasthis)
936                                 t = sig->params [i - sig->hasthis];
937                         else
938                                 t = &mono_defaults.int_class->byval_arg;
939                         t = mono_type_get_underlying_type (t);
940
941                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
942                         in = call->args [i];
943                         arg->cil_code = in->cil_code;
944                         arg->inst_left = in;
945                         arg->type = in->type;
946                         /* prepend, so they get reversed */
947                         arg->next = call->out_args;
948                         call->out_args = arg;
949
950                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
951                                 guint32 size, align;
952
953                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
954                                         size = sizeof (MonoTypedRef);
955                                         align = sizeof (gpointer);
956                                 }
957                                 else
958                                         if (sig->pinvoke)
959                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
960                                         else {
961                                                 int ialign;
962                                                 size = mono_type_stack_size (&in->klass->byval_arg, &ialign);
963                                                 align = ialign;
964                                         }
965                                 arg->opcode = OP_OUTARG_VT;
966                                 arg->klass = in->klass;
967                                 arg->backend.is_pinvoke = sig->pinvoke;
968                                 arg->inst_imm = size; 
969                         }
970                         else {
971                                 switch (ainfo->storage) {
972                                 case ArgOnStack:
973                                         arg->opcode = OP_OUTARG;
974                                         if (!t->byref) {
975                                                 if (t->type == MONO_TYPE_R4)
976                                                         arg->opcode = OP_OUTARG_R4;
977                                                 else
978                                                         if (t->type == MONO_TYPE_R8)
979                                                                 arg->opcode = OP_OUTARG_R8;
980                                         }
981                                         break;
982                                 default:
983                                         g_assert_not_reached ();
984                                 }
985                         }
986                 }
987         }
988
989         /* Handle the case where there are no implicit arguments */
990         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
991                 emit_sig_cookie (cfg, call);
992         }
993
994         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
995                 if (cinfo->ret.storage == ArgValuetypeInReg) {
996                         MonoInst *zero_inst;
997                         /*
998                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
999                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1000                          * before calling the function. So we add a dummy instruction to represent pushing the 
1001                          * struct return address to the stack. The return address will be saved to this stack slot 
1002                          * by the code emitted in this_vret_args.
1003                          */
1004                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1005                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1006                         zero_inst->inst_p0 = 0;
1007                         arg->inst_left = zero_inst;
1008                         arg->type = STACK_PTR;
1009                         /* prepend, so they get reversed */
1010                         arg->next = call->out_args;
1011                         call->out_args = arg;
1012                 }
1013                 else
1014                         /* if the function returns a struct, the called method already does a ret $0x4 */
1015                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1016                                 cinfo->stack_usage -= 4;
1017         }
1018         
1019         call->stack_usage = cinfo->stack_usage;
1020
1021 #if defined(__APPLE__)
1022         if (cinfo->need_stack_align) {
1023                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1024                 arg->inst_c0 = cinfo->stack_align_amount;
1025                 arg->next = call->out_args;
1026                 call->out_args = arg;
1027         }
1028 #endif 
1029
1030         return call;
1031 }
1032
1033 /*
1034  * Allow tracing to work with this interface (with an optional argument)
1035  */
1036 void*
1037 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1038 {
1039         guchar *code = p;
1040
1041 #if __APPLE__
1042         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1043 #endif
1044
1045         /* if some args are passed in registers, we need to save them here */
1046         x86_push_reg (code, X86_EBP);
1047
1048         if (cfg->compile_aot) {
1049                 x86_push_imm (code, cfg->method);
1050                 x86_mov_reg_imm (code, X86_EAX, func);
1051                 x86_call_reg (code, X86_EAX);
1052         } else {
1053                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1054                 x86_push_imm (code, cfg->method);
1055                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1056                 x86_call_code (code, 0);
1057         }
1058 #if __APPLE__
1059         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 16);
1060 #else
1061         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1062 #endif
1063
1064         return code;
1065 }
1066
1067 enum {
1068         SAVE_NONE,
1069         SAVE_STRUCT,
1070         SAVE_EAX,
1071         SAVE_EAX_EDX,
1072         SAVE_FP
1073 };
1074
1075 void*
1076 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1077 {
1078         guchar *code = p;
1079         int arg_size = 0, save_mode = SAVE_NONE;
1080         MonoMethod *method = cfg->method;
1081         
1082         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1083         case MONO_TYPE_VOID:
1084                 /* special case string .ctor icall */
1085                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1086                         save_mode = SAVE_EAX;
1087                 else
1088                         save_mode = SAVE_NONE;
1089                 break;
1090         case MONO_TYPE_I8:
1091         case MONO_TYPE_U8:
1092                 save_mode = SAVE_EAX_EDX;
1093                 break;
1094         case MONO_TYPE_R4:
1095         case MONO_TYPE_R8:
1096                 save_mode = SAVE_FP;
1097                 break;
1098         case MONO_TYPE_GENERICINST:
1099                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1100                         save_mode = SAVE_EAX;
1101                         break;
1102                 }
1103                 /* Fall through */
1104         case MONO_TYPE_VALUETYPE:
1105                 save_mode = SAVE_STRUCT;
1106                 break;
1107         default:
1108                 save_mode = SAVE_EAX;
1109                 break;
1110         }
1111
1112         switch (save_mode) {
1113         case SAVE_EAX_EDX:
1114                 x86_push_reg (code, X86_EDX);
1115                 x86_push_reg (code, X86_EAX);
1116                 if (enable_arguments) {
1117                         x86_push_reg (code, X86_EDX);
1118                         x86_push_reg (code, X86_EAX);
1119                         arg_size = 8;
1120                 }
1121                 break;
1122         case SAVE_EAX:
1123                 x86_push_reg (code, X86_EAX);
1124                 if (enable_arguments) {
1125                         x86_push_reg (code, X86_EAX);
1126                         arg_size = 4;
1127                 }
1128                 break;
1129         case SAVE_FP:
1130                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1131                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1132                 if (enable_arguments) {
1133                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1134                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1135                         arg_size = 8;
1136                 }
1137                 break;
1138         case SAVE_STRUCT:
1139                 if (enable_arguments) {
1140                         x86_push_membase (code, X86_EBP, 8);
1141                         arg_size = 4;
1142                 }
1143                 break;
1144         case SAVE_NONE:
1145         default:
1146                 break;
1147         }
1148
1149         if (cfg->compile_aot) {
1150                 x86_push_imm (code, method);
1151                 x86_mov_reg_imm (code, X86_EAX, func);
1152                 x86_call_reg (code, X86_EAX);
1153         } else {
1154                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1155                 x86_push_imm (code, method);
1156                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1157                 x86_call_code (code, 0);
1158         }
1159         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1160
1161         switch (save_mode) {
1162         case SAVE_EAX_EDX:
1163                 x86_pop_reg (code, X86_EAX);
1164                 x86_pop_reg (code, X86_EDX);
1165                 break;
1166         case SAVE_EAX:
1167                 x86_pop_reg (code, X86_EAX);
1168                 break;
1169         case SAVE_FP:
1170                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1171                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1172                 break;
1173         case SAVE_NONE:
1174         default:
1175                 break;
1176         }
1177
1178         return code;
1179 }
1180
1181 #define EMIT_COND_BRANCH(ins,cond,sign) \
1182 if (ins->flags & MONO_INST_BRLABEL) { \
1183         if (ins->inst_i0->inst_c0) { \
1184                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1185         } else { \
1186                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1187                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1188                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1189                         x86_branch8 (code, cond, 0, sign); \
1190                 else \
1191                         x86_branch32 (code, cond, 0, sign); \
1192         } \
1193 } else { \
1194         if (ins->inst_true_bb->native_offset) { \
1195                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1196         } else { \
1197                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1198                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1199                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1200                         x86_branch8 (code, cond, 0, sign); \
1201                 else \
1202                         x86_branch32 (code, cond, 0, sign); \
1203         } \
1204 }
1205
1206 /*  
1207  *      Emit an exception if condition is fail and
1208  *  if possible do a directly branch to target 
1209  */
1210 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1211         do {                                                        \
1212                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1213                 if (tins == NULL) {                                                                             \
1214                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1215                                         MONO_PATCH_INFO_EXC, exc_name);  \
1216                         x86_branch32 (code, cond, 0, signed);               \
1217                 } else {        \
1218                         EMIT_COND_BRANCH (tins, cond, signed);  \
1219                 }                       \
1220         } while (0); 
1221
1222 #define EMIT_FPCOMPARE(code) do { \
1223         x86_fcompp (code); \
1224         x86_fnstsw (code); \
1225 } while (0); 
1226
1227
1228 static guint8*
1229 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1230 {
1231         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1232         x86_call_code (code, 0);
1233
1234         return code;
1235 }
1236
1237 /* FIXME: Add more instructions */
1238 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == OP_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1239
1240 static void
1241 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1242 {
1243         MonoInst *ins, *last_ins = NULL;
1244         ins = bb->code;
1245
1246         while (ins) {
1247
1248                 switch (ins->opcode) {
1249                 case OP_ICONST:
1250                         /* reg = 0 -> XOR (reg, reg) */
1251                         /* XOR sets cflags on x86, so we cant do it always */
1252                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1253                                 ins->opcode = CEE_XOR;
1254                                 ins->sreg1 = ins->dreg;
1255                                 ins->sreg2 = ins->dreg;
1256                         }
1257                         break;
1258                 case OP_MUL_IMM: 
1259                         /* remove unnecessary multiplication with 1 */
1260                         if (ins->inst_imm == 1) {
1261                                 if (ins->dreg != ins->sreg1) {
1262                                         ins->opcode = OP_MOVE;
1263                                 } else {
1264                                         last_ins->next = ins->next;
1265                                         ins = ins->next;
1266                                         continue;
1267                                 }
1268                         }
1269                         break;
1270                 case OP_COMPARE_IMM:
1271                         /* OP_COMPARE_IMM (reg, 0) 
1272                          * --> 
1273                          * OP_X86_TEST_NULL (reg) 
1274                          */
1275                         if (!ins->inst_imm)
1276                                 ins->opcode = OP_X86_TEST_NULL;
1277                         break;
1278                 case OP_X86_COMPARE_MEMBASE_IMM:
1279                         /* 
1280                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1281                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1282                          * -->
1283                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1284                          * OP_COMPARE_IMM reg, imm
1285                          *
1286                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1287                          */
1288                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1289                             ins->inst_basereg == last_ins->inst_destbasereg &&
1290                             ins->inst_offset == last_ins->inst_offset) {
1291                                         ins->opcode = OP_COMPARE_IMM;
1292                                         ins->sreg1 = last_ins->sreg1;
1293
1294                                         /* check if we can remove cmp reg,0 with test null */
1295                                         if (!ins->inst_imm)
1296                                                 ins->opcode = OP_X86_TEST_NULL;
1297                                 }
1298
1299                         break;
1300                 case OP_LOAD_MEMBASE:
1301                 case OP_LOADI4_MEMBASE:
1302                         /* 
1303                          * Note: if reg1 = reg2 the load op is removed
1304                          *
1305                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1306                          * OP_LOAD_MEMBASE offset(basereg), reg2
1307                          * -->
1308                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1309                          * OP_MOVE reg1, reg2
1310                          */
1311                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1312                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1313                             ins->inst_basereg == last_ins->inst_destbasereg &&
1314                             ins->inst_offset == last_ins->inst_offset) {
1315                                 if (ins->dreg == last_ins->sreg1) {
1316                                         last_ins->next = ins->next;                             
1317                                         ins = ins->next;                                
1318                                         continue;
1319                                 } else {
1320                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1321                                         ins->opcode = OP_MOVE;
1322                                         ins->sreg1 = last_ins->sreg1;
1323                                 }
1324
1325                         /* 
1326                          * Note: reg1 must be different from the basereg in the second load
1327                          * Note: if reg1 = reg2 is equal then second load is removed
1328                          *
1329                          * OP_LOAD_MEMBASE offset(basereg), reg1
1330                          * OP_LOAD_MEMBASE offset(basereg), reg2
1331                          * -->
1332                          * OP_LOAD_MEMBASE offset(basereg), reg1
1333                          * OP_MOVE reg1, reg2
1334                          */
1335                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1336                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1337                               ins->inst_basereg != last_ins->dreg &&
1338                               ins->inst_basereg == last_ins->inst_basereg &&
1339                               ins->inst_offset == last_ins->inst_offset) {
1340
1341                                 if (ins->dreg == last_ins->dreg) {
1342                                         last_ins->next = ins->next;                             
1343                                         ins = ins->next;                                
1344                                         continue;
1345                                 } else {
1346                                         ins->opcode = OP_MOVE;
1347                                         ins->sreg1 = last_ins->dreg;
1348                                 }
1349
1350                                 //g_assert_not_reached ();
1351
1352 #if 0
1353                         /* 
1354                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1355                          * OP_LOAD_MEMBASE offset(basereg), reg
1356                          * -->
1357                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1358                          * OP_ICONST reg, imm
1359                          */
1360                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1361                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1362                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1363                                    ins->inst_offset == last_ins->inst_offset) {
1364                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1365                                 ins->opcode = OP_ICONST;
1366                                 ins->inst_c0 = last_ins->inst_imm;
1367                                 g_assert_not_reached (); // check this rule
1368 #endif
1369                         }
1370                         break;
1371                 case OP_LOADU1_MEMBASE:
1372                 case OP_LOADI1_MEMBASE:
1373                         /* 
1374                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1375                          * OP_LOAD_MEMBASE offset(basereg), reg2
1376                          * -->
1377                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1378                          * CONV_I2/U2 reg1, reg2
1379                          */
1380                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1381                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1382                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1383                                         ins->inst_offset == last_ins->inst_offset) {
1384                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1385                                 ins->sreg1 = last_ins->sreg1;
1386                         }
1387                         break;
1388                 case OP_LOADU2_MEMBASE:
1389                 case OP_LOADI2_MEMBASE:
1390                         /* 
1391                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1392                          * OP_LOAD_MEMBASE offset(basereg), reg2
1393                          * -->
1394                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1395                          * CONV_I2/U2 reg1, reg2
1396                          */
1397                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1398                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1399                                         ins->inst_offset == last_ins->inst_offset) {
1400                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1401                                 ins->sreg1 = last_ins->sreg1;
1402                         }
1403                         break;
1404                 case CEE_CONV_I4:
1405                 case CEE_CONV_U4:
1406                 case OP_MOVE:
1407                         /*
1408                          * Removes:
1409                          *
1410                          * OP_MOVE reg, reg 
1411                          */
1412                         if (ins->dreg == ins->sreg1) {
1413                                 if (last_ins)
1414                                         last_ins->next = ins->next;                             
1415                                 ins = ins->next;
1416                                 continue;
1417                         }
1418                         /* 
1419                          * Removes:
1420                          *
1421                          * OP_MOVE sreg, dreg 
1422                          * OP_MOVE dreg, sreg
1423                          */
1424                         if (last_ins && last_ins->opcode == OP_MOVE &&
1425                             ins->sreg1 == last_ins->dreg &&
1426                             ins->dreg == last_ins->sreg1) {
1427                                 last_ins->next = ins->next;                             
1428                                 ins = ins->next;                                
1429                                 continue;
1430                         }
1431                         break;
1432                         
1433                 case OP_X86_PUSH_MEMBASE:
1434                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1435                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1436                             ins->inst_basereg == last_ins->inst_destbasereg &&
1437                             ins->inst_offset == last_ins->inst_offset) {
1438                                     ins->opcode = OP_X86_PUSH;
1439                                     ins->sreg1 = last_ins->sreg1;
1440                         }
1441                         break;
1442                 }
1443                 last_ins = ins;
1444                 ins = ins->next;
1445         }
1446         bb->last_ins = last_ins;
1447 }
1448
1449 static const int 
1450 branch_cc_table [] = {
1451         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1452         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1453         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1454 };
1455
1456 /*#include "cprop.c"*/
1457 void
1458 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1459 {
1460         mono_local_regalloc (cfg, bb);
1461 }
1462
1463 static unsigned char*
1464 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1465 {
1466         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1467         x86_fnstcw_membase(code, X86_ESP, 0);
1468         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1469         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1470         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1471         x86_fldcw_membase (code, X86_ESP, 2);
1472         if (size == 8) {
1473                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1474                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1475                 x86_pop_reg (code, dreg);
1476                 /* FIXME: need the high register 
1477                  * x86_pop_reg (code, dreg_high);
1478                  */
1479         } else {
1480                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1481                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1482                 x86_pop_reg (code, dreg);
1483         }
1484         x86_fldcw_membase (code, X86_ESP, 0);
1485         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1486
1487         if (size == 1)
1488                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1489         else if (size == 2)
1490                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1491         return code;
1492 }
1493
1494 static unsigned char*
1495 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1496 {
1497         int sreg = tree->sreg1;
1498         int need_touch = FALSE;
1499
1500 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1501         need_touch = TRUE;
1502 #endif
1503
1504         if (need_touch) {
1505                 guint8* br[5];
1506
1507                 /*
1508                  * Under Windows:
1509                  * If requested stack size is larger than one page,
1510                  * perform stack-touch operation
1511                  */
1512                 /*
1513                  * Generate stack probe code.
1514                  * Under Windows, it is necessary to allocate one page at a time,
1515                  * "touching" stack after each successful sub-allocation. This is
1516                  * because of the way stack growth is implemented - there is a
1517                  * guard page before the lowest stack page that is currently commited.
1518                  * Stack normally grows sequentially so OS traps access to the
1519                  * guard page and commits more pages when needed.
1520                  */
1521                 x86_test_reg_imm (code, sreg, ~0xFFF);
1522                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1523
1524                 br[2] = code; /* loop */
1525                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1526                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1527
1528                 /* 
1529                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1530                  * that follows only initializes the last part of the area.
1531                  */
1532                 /* Same as the init code below with size==0x1000 */
1533                 if (tree->flags & MONO_INST_INIT) {
1534                         x86_push_reg (code, X86_EAX);
1535                         x86_push_reg (code, X86_ECX);
1536                         x86_push_reg (code, X86_EDI);
1537                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1538                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1539                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1540                         x86_cld (code);
1541                         x86_prefix (code, X86_REP_PREFIX);
1542                         x86_stosl (code);
1543                         x86_pop_reg (code, X86_EDI);
1544                         x86_pop_reg (code, X86_ECX);
1545                         x86_pop_reg (code, X86_EAX);
1546                 }
1547
1548                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1549                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1550                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1551                 x86_patch (br[3], br[2]);
1552                 x86_test_reg_reg (code, sreg, sreg);
1553                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1554                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1555
1556                 br[1] = code; x86_jump8 (code, 0);
1557
1558                 x86_patch (br[0], code);
1559                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1560                 x86_patch (br[1], code);
1561                 x86_patch (br[4], code);
1562         }
1563         else
1564                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1565
1566         if (tree->flags & MONO_INST_INIT) {
1567                 int offset = 0;
1568                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1569                         x86_push_reg (code, X86_EAX);
1570                         offset += 4;
1571                 }
1572                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1573                         x86_push_reg (code, X86_ECX);
1574                         offset += 4;
1575                 }
1576                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1577                         x86_push_reg (code, X86_EDI);
1578                         offset += 4;
1579                 }
1580                 
1581                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1582                 if (sreg != X86_ECX)
1583                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1584                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1585                                 
1586                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1587                 x86_cld (code);
1588                 x86_prefix (code, X86_REP_PREFIX);
1589                 x86_stosl (code);
1590                 
1591                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1592                         x86_pop_reg (code, X86_EDI);
1593                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1594                         x86_pop_reg (code, X86_ECX);
1595                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1596                         x86_pop_reg (code, X86_EAX);
1597         }
1598         return code;
1599 }
1600
1601
1602 static guint8*
1603 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1604 {
1605         CallInfo *cinfo;
1606         int quad;
1607
1608         /* Move return value to the target register */
1609         switch (ins->opcode) {
1610         case CEE_CALL:
1611         case OP_CALL_REG:
1612         case OP_CALL_MEMBASE:
1613                 if (ins->dreg != X86_EAX)
1614                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1615                 break;
1616         case OP_VCALL:
1617         case OP_VCALL_REG:
1618         case OP_VCALL_MEMBASE:
1619                 cinfo = get_call_info (cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
1620                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1621                         /* Pop the destination address from the stack */
1622                         x86_pop_reg (code, X86_ECX);
1623                         
1624                         for (quad = 0; quad < 2; quad ++) {
1625                                 switch (cinfo->ret.pair_storage [quad]) {
1626                                 case ArgInIReg:
1627                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1628                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1629                                         break;
1630                                 case ArgNone:
1631                                         break;
1632                                 default:
1633                                         g_assert_not_reached ();
1634                                 }
1635                         }
1636                 }
1637         default:
1638                 break;
1639         }
1640
1641         return code;
1642 }
1643
1644 /*
1645  * emit_tls_get:
1646  * @code: buffer to store code to
1647  * @dreg: hard register where to place the result
1648  * @tls_offset: offset info
1649  *
1650  * emit_tls_get emits in @code the native code that puts in the dreg register
1651  * the item in the thread local storage identified by tls_offset.
1652  *
1653  * Returns: a pointer to the end of the stored code
1654  */
1655 static guint8*
1656 emit_tls_get (guint8* code, int dreg, int tls_offset)
1657 {
1658 #ifdef PLATFORM_WIN32
1659         /* 
1660          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1661          * Journal and/or a disassembly of the TlsGet () function.
1662          */
1663         g_assert (tls_offset < 64);
1664         x86_prefix (code, X86_FS_PREFIX);
1665         x86_mov_reg_mem (code, dreg, 0x18, 4);
1666         /* Dunno what this does but TlsGetValue () contains it */
1667         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1668         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1669 #else
1670         if (optimize_for_xen) {
1671                 x86_prefix (code, X86_GS_PREFIX);
1672                 x86_mov_reg_mem (code, dreg, 0, 4);
1673                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1674         } else {
1675                 x86_prefix (code, X86_GS_PREFIX);
1676                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1677         }
1678 #endif
1679         return code;
1680 }
1681
1682 /*
1683  * emit_load_volatile_arguments:
1684  *
1685  *  Load volatile arguments from the stack to the original input registers.
1686  * Required before a tail call.
1687  */
1688 static guint8*
1689 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
1690 {
1691         MonoMethod *method = cfg->method;
1692         MonoMethodSignature *sig;
1693         MonoInst *inst;
1694         CallInfo *cinfo;
1695         guint32 i;
1696
1697         /* FIXME: Generate intermediate code instead */
1698
1699         sig = mono_method_signature (method);
1700
1701         cinfo = get_call_info (cfg->mempool, sig, FALSE);
1702         
1703         /* This is the opposite of the code in emit_prolog */
1704
1705         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1706                 ArgInfo *ainfo = cinfo->args + i;
1707                 MonoType *arg_type;
1708                 inst = cfg->args [i];
1709
1710                 if (sig->hasthis && (i == 0))
1711                         arg_type = &mono_defaults.object_class->byval_arg;
1712                 else
1713                         arg_type = sig->params [i - sig->hasthis];
1714
1715                 /*
1716                  * On x86, the arguments are either in their original stack locations, or in
1717                  * global regs.
1718                  */
1719                 if (inst->opcode == OP_REGVAR) {
1720                         g_assert (ainfo->storage == ArgOnStack);
1721                         
1722                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
1723                 }
1724         }
1725
1726         return code;
1727 }
1728
1729 #define REAL_PRINT_REG(text,reg) \
1730 mono_assert (reg >= 0); \
1731 x86_push_reg (code, X86_EAX); \
1732 x86_push_reg (code, X86_EDX); \
1733 x86_push_reg (code, X86_ECX); \
1734 x86_push_reg (code, reg); \
1735 x86_push_imm (code, reg); \
1736 x86_push_imm (code, text " %d %p\n"); \
1737 x86_mov_reg_imm (code, X86_EAX, printf); \
1738 x86_call_reg (code, X86_EAX); \
1739 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1740 x86_pop_reg (code, X86_ECX); \
1741 x86_pop_reg (code, X86_EDX); \
1742 x86_pop_reg (code, X86_EAX);
1743
1744 /* benchmark and set based on cpu */
1745 #define LOOP_ALIGNMENT 8
1746 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
1747
1748 void
1749 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1750 {
1751         MonoInst *ins;
1752         MonoCallInst *call;
1753         guint offset;
1754         guint8 *code = cfg->native_code + cfg->code_len;
1755         MonoInst *last_ins = NULL;
1756         guint last_offset = 0;
1757         int max_len, cpos;
1758
1759         if (cfg->opt & MONO_OPT_PEEPHOLE)
1760                 peephole_pass (cfg, bb);
1761
1762         if (cfg->opt & MONO_OPT_LOOP) {
1763                 int pad, align = LOOP_ALIGNMENT;
1764                 /* set alignment depending on cpu */
1765                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
1766                         pad = align - pad;
1767                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
1768                         x86_padding (code, pad);
1769                         cfg->code_len += pad;
1770                         bb->native_offset = cfg->code_len;
1771                 }
1772         }
1773
1774         if (cfg->verbose_level > 2)
1775                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1776
1777         cpos = bb->max_offset;
1778
1779         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
1780                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
1781                 g_assert (!cfg->compile_aot);
1782                 cpos += 6;
1783
1784                 cov->data [bb->dfn].cil_code = bb->cil_code;
1785                 /* this is not thread save, but good enough */
1786                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
1787         }
1788
1789         offset = code - cfg->native_code;
1790
1791         mono_debug_open_block (cfg, bb, offset);
1792
1793         ins = bb->code;
1794         while (ins) {
1795                 offset = code - cfg->native_code;
1796
1797                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
1798
1799                 if (offset > (cfg->code_size - max_len - 16)) {
1800                         cfg->code_size *= 2;
1801                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
1802                         code = cfg->native_code + offset;
1803                         mono_jit_stats.code_reallocs++;
1804                 }
1805
1806                 mono_debug_record_line_number (cfg, ins, offset);
1807
1808                 switch (ins->opcode) {
1809                 case OP_BIGMUL:
1810                         x86_mul_reg (code, ins->sreg2, TRUE);
1811                         break;
1812                 case OP_BIGMUL_UN:
1813                         x86_mul_reg (code, ins->sreg2, FALSE);
1814                         break;
1815                 case OP_X86_SETEQ_MEMBASE:
1816                 case OP_X86_SETNE_MEMBASE:
1817                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
1818                                          ins->inst_basereg, ins->inst_offset, TRUE);
1819                         break;
1820                 case OP_STOREI1_MEMBASE_IMM:
1821                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
1822                         break;
1823                 case OP_STOREI2_MEMBASE_IMM:
1824                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
1825                         break;
1826                 case OP_STORE_MEMBASE_IMM:
1827                 case OP_STOREI4_MEMBASE_IMM:
1828                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
1829                         break;
1830                 case OP_STOREI1_MEMBASE_REG:
1831                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
1832                         break;
1833                 case OP_STOREI2_MEMBASE_REG:
1834                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
1835                         break;
1836                 case OP_STORE_MEMBASE_REG:
1837                 case OP_STOREI4_MEMBASE_REG:
1838                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
1839                         break;
1840                 case CEE_LDIND_I:
1841                 case CEE_LDIND_I4:
1842                 case CEE_LDIND_U4:
1843                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
1844                         break;
1845                 case OP_LOADU4_MEM:
1846                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
1847                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
1848                         break;
1849                 case OP_LOAD_MEMBASE:
1850                 case OP_LOADI4_MEMBASE:
1851                 case OP_LOADU4_MEMBASE:
1852                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
1853                         break;
1854                 case OP_LOADU1_MEMBASE:
1855                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
1856                         break;
1857                 case OP_LOADI1_MEMBASE:
1858                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
1859                         break;
1860                 case OP_LOADU2_MEMBASE:
1861                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
1862                         break;
1863                 case OP_LOADI2_MEMBASE:
1864                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
1865                         break;
1866                 case CEE_CONV_I1:
1867                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
1868                         break;
1869                 case CEE_CONV_I2:
1870                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
1871                         break;
1872                 case CEE_CONV_U1:
1873                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
1874                         break;
1875                 case CEE_CONV_U2:
1876                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
1877                         break;
1878                 case OP_COMPARE:
1879                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
1880                         break;
1881                 case OP_COMPARE_IMM:
1882                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
1883                         break;
1884                 case OP_X86_COMPARE_MEMBASE_REG:
1885                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
1886                         break;
1887                 case OP_X86_COMPARE_MEMBASE_IMM:
1888                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1889                         break;
1890                 case OP_X86_COMPARE_MEMBASE8_IMM:
1891                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1892                         break;
1893                 case OP_X86_COMPARE_REG_MEMBASE:
1894                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
1895                         break;
1896                 case OP_X86_COMPARE_MEM_IMM:
1897                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
1898                         break;
1899                 case OP_X86_TEST_NULL:
1900                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
1901                         break;
1902                 case OP_X86_ADD_MEMBASE_IMM:
1903                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1904                         break;
1905                 case OP_X86_ADD_MEMBASE:
1906                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
1907                         break;
1908                 case OP_X86_SUB_MEMBASE_IMM:
1909                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1910                         break;
1911                 case OP_X86_SUB_MEMBASE:
1912                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
1913                         break;
1914                 case OP_X86_AND_MEMBASE_IMM:
1915                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1916                         break;
1917                 case OP_X86_OR_MEMBASE_IMM:
1918                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1919                         break;
1920                 case OP_X86_XOR_MEMBASE_IMM:
1921                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1922                         break;
1923                 case OP_X86_INC_MEMBASE:
1924                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
1925                         break;
1926                 case OP_X86_INC_REG:
1927                         x86_inc_reg (code, ins->dreg);
1928                         break;
1929                 case OP_X86_DEC_MEMBASE:
1930                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
1931                         break;
1932                 case OP_X86_DEC_REG:
1933                         x86_dec_reg (code, ins->dreg);
1934                         break;
1935                 case OP_X86_MUL_MEMBASE:
1936                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
1937                         break;
1938                 case OP_BREAK:
1939                         x86_breakpoint (code);
1940                         break;
1941                 case OP_ADDCC:
1942                 case CEE_ADD:
1943                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
1944                         break;
1945                 case OP_ADC:
1946                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
1947                         break;
1948                 case OP_ADDCC_IMM:
1949                 case OP_ADD_IMM:
1950                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
1951                         break;
1952                 case OP_ADC_IMM:
1953                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
1954                         break;
1955                 case OP_SUBCC:
1956                 case CEE_SUB:
1957                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
1958                         break;
1959                 case OP_SBB:
1960                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
1961                         break;
1962                 case OP_SUBCC_IMM:
1963                 case OP_SUB_IMM:
1964                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
1965                         break;
1966                 case OP_SBB_IMM:
1967                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
1968                         break;
1969                 case CEE_AND:
1970                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
1971                         break;
1972                 case OP_AND_IMM:
1973                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
1974                         break;
1975                 case CEE_DIV:
1976                         x86_cdq (code);
1977                         x86_div_reg (code, ins->sreg2, TRUE);
1978                         break;
1979                 case CEE_DIV_UN:
1980                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1981                         x86_div_reg (code, ins->sreg2, FALSE);
1982                         break;
1983                 case OP_DIV_IMM:
1984                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1985                         x86_cdq (code);
1986                         x86_div_reg (code, ins->sreg2, TRUE);
1987                         break;
1988                 case CEE_REM:
1989                         x86_cdq (code);
1990                         x86_div_reg (code, ins->sreg2, TRUE);
1991                         break;
1992                 case CEE_REM_UN:
1993                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1994                         x86_div_reg (code, ins->sreg2, FALSE);
1995                         break;
1996                 case OP_REM_IMM:
1997                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1998                         x86_cdq (code);
1999                         x86_div_reg (code, ins->sreg2, TRUE);
2000                         break;
2001                 case CEE_OR:
2002                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2003                         break;
2004                 case OP_OR_IMM:
2005                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2006                         break;
2007                 case CEE_XOR:
2008                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2009                         break;
2010                 case OP_XOR_IMM:
2011                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2012                         break;
2013                 case CEE_SHL:
2014                         g_assert (ins->sreg2 == X86_ECX);
2015                         x86_shift_reg (code, X86_SHL, ins->dreg);
2016                         break;
2017                 case CEE_SHR:
2018                         g_assert (ins->sreg2 == X86_ECX);
2019                         x86_shift_reg (code, X86_SAR, ins->dreg);
2020                         break;
2021                 case OP_SHR_IMM:
2022                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2023                         break;
2024                 case OP_SHR_UN_IMM:
2025                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2026                         break;
2027                 case CEE_SHR_UN:
2028                         g_assert (ins->sreg2 == X86_ECX);
2029                         x86_shift_reg (code, X86_SHR, ins->dreg);
2030                         break;
2031                 case OP_SHL_IMM:
2032                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2033                         break;
2034                 case OP_LSHL: {
2035                         guint8 *jump_to_end;
2036
2037                         /* handle shifts below 32 bits */
2038                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2039                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2040
2041                         x86_test_reg_imm (code, X86_ECX, 32);
2042                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2043
2044                         /* handle shift over 32 bit */
2045                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2046                         x86_clear_reg (code, ins->sreg1);
2047                         
2048                         x86_patch (jump_to_end, code);
2049                         }
2050                         break;
2051                 case OP_LSHR: {
2052                         guint8 *jump_to_end;
2053
2054                         /* handle shifts below 32 bits */
2055                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2056                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2057
2058                         x86_test_reg_imm (code, X86_ECX, 32);
2059                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2060
2061                         /* handle shifts over 31 bits */
2062                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2063                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2064                         
2065                         x86_patch (jump_to_end, code);
2066                         }
2067                         break;
2068                 case OP_LSHR_UN: {
2069                         guint8 *jump_to_end;
2070
2071                         /* handle shifts below 32 bits */
2072                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2073                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2074
2075                         x86_test_reg_imm (code, X86_ECX, 32);
2076                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2077
2078                         /* handle shifts over 31 bits */
2079                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2080                         x86_clear_reg (code, ins->backend.reg3);
2081                         
2082                         x86_patch (jump_to_end, code);
2083                         }
2084                         break;
2085                 case OP_LSHL_IMM:
2086                         if (ins->inst_imm >= 32) {
2087                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2088                                 x86_clear_reg (code, ins->sreg1);
2089                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2090                         } else {
2091                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2092                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2093                         }
2094                         break;
2095                 case OP_LSHR_IMM:
2096                         if (ins->inst_imm >= 32) {
2097                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2098                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2099                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2100                         } else {
2101                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2102                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2103                         }
2104                         break;
2105                 case OP_LSHR_UN_IMM:
2106                         if (ins->inst_imm >= 32) {
2107                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2108                                 x86_clear_reg (code, ins->backend.reg3);
2109                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2110                         } else {
2111                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2112                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2113                         }
2114                         break;
2115                 case CEE_NOT:
2116                         x86_not_reg (code, ins->sreg1);
2117                         break;
2118                 case CEE_NEG:
2119                         x86_neg_reg (code, ins->sreg1);
2120                         break;
2121                 case OP_SEXT_I1:
2122                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2123                         break;
2124                 case OP_SEXT_I2:
2125                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2126                         break;
2127                 case CEE_MUL:
2128                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2129                         break;
2130                 case OP_MUL_IMM:
2131                         switch (ins->inst_imm) {
2132                         case 2:
2133                                 /* MOV r1, r2 */
2134                                 /* ADD r1, r1 */
2135                                 if (ins->dreg != ins->sreg1)
2136                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2137                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2138                                 break;
2139                         case 3:
2140                                 /* LEA r1, [r2 + r2*2] */
2141                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2142                                 break;
2143                         case 5:
2144                                 /* LEA r1, [r2 + r2*4] */
2145                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2146                                 break;
2147                         case 6:
2148                                 /* LEA r1, [r2 + r2*2] */
2149                                 /* ADD r1, r1          */
2150                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2151                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2152                                 break;
2153                         case 9:
2154                                 /* LEA r1, [r2 + r2*8] */
2155                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2156                                 break;
2157                         case 10:
2158                                 /* LEA r1, [r2 + r2*4] */
2159                                 /* ADD r1, r1          */
2160                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2161                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2162                                 break;
2163                         case 12:
2164                                 /* LEA r1, [r2 + r2*2] */
2165                                 /* SHL r1, 2           */
2166                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2167                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2168                                 break;
2169                         case 25:
2170                                 /* LEA r1, [r2 + r2*4] */
2171                                 /* LEA r1, [r1 + r1*4] */
2172                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2173                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2174                                 break;
2175                         case 100:
2176                                 /* LEA r1, [r2 + r2*4] */
2177                                 /* SHL r1, 2           */
2178                                 /* LEA r1, [r1 + r1*4] */
2179                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2180                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2181                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2182                                 break;
2183                         default:
2184                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2185                                 break;
2186                         }
2187                         break;
2188                 case CEE_MUL_OVF:
2189                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2190                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2191                         break;
2192                 case CEE_MUL_OVF_UN: {
2193                         /* the mul operation and the exception check should most likely be split */
2194                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2195                         /*g_assert (ins->sreg2 == X86_EAX);
2196                         g_assert (ins->dreg == X86_EAX);*/
2197                         if (ins->sreg2 == X86_EAX) {
2198                                 non_eax_reg = ins->sreg1;
2199                         } else if (ins->sreg1 == X86_EAX) {
2200                                 non_eax_reg = ins->sreg2;
2201                         } else {
2202                                 /* no need to save since we're going to store to it anyway */
2203                                 if (ins->dreg != X86_EAX) {
2204                                         saved_eax = TRUE;
2205                                         x86_push_reg (code, X86_EAX);
2206                                 }
2207                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2208                                 non_eax_reg = ins->sreg2;
2209                         }
2210                         if (ins->dreg == X86_EDX) {
2211                                 if (!saved_eax) {
2212                                         saved_eax = TRUE;
2213                                         x86_push_reg (code, X86_EAX);
2214                                 }
2215                         } else if (ins->dreg != X86_EAX) {
2216                                 saved_edx = TRUE;
2217                                 x86_push_reg (code, X86_EDX);
2218                         }
2219                         x86_mul_reg (code, non_eax_reg, FALSE);
2220                         /* save before the check since pop and mov don't change the flags */
2221                         if (ins->dreg != X86_EAX)
2222                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2223                         if (saved_edx)
2224                                 x86_pop_reg (code, X86_EDX);
2225                         if (saved_eax)
2226                                 x86_pop_reg (code, X86_EAX);
2227                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2228                         break;
2229                 }
2230                 case OP_ICONST:
2231                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2232                         break;
2233                 case OP_AOTCONST:
2234                         g_assert_not_reached ();
2235                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2236                         x86_mov_reg_imm (code, ins->dreg, 0);
2237                         break;
2238                 case OP_LOAD_GOTADDR:
2239                         x86_call_imm (code, 0);
2240                         /* 
2241                          * The patch needs to point to the pop, since the GOT offset needs 
2242                          * to be added to that address.
2243                          */
2244                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2245                         x86_pop_reg (code, ins->dreg);
2246                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2247                         break;
2248                 case OP_GOT_ENTRY:
2249                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2250                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2251                         break;
2252                 case OP_X86_PUSH_GOT_ENTRY:
2253                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2254                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2255                         break;
2256                 case CEE_CONV_I4:
2257                 case OP_MOVE:
2258                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2259                         break;
2260                 case CEE_CONV_U4:
2261                         g_assert_not_reached ();
2262                 case OP_JMP: {
2263                         /*
2264                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2265                          * Keep in sync with the code in emit_epilog.
2266                          */
2267                         int pos = 0;
2268
2269                         /* FIXME: no tracing support... */
2270                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2271                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2272                         /* reset offset to make max_len work */
2273                         offset = code - cfg->native_code;
2274
2275                         g_assert (!cfg->method->save_lmf);
2276
2277                         code = emit_load_volatile_arguments (cfg, code);
2278
2279                         if (cfg->used_int_regs & (1 << X86_EBX))
2280                                 pos -= 4;
2281                         if (cfg->used_int_regs & (1 << X86_EDI))
2282                                 pos -= 4;
2283                         if (cfg->used_int_regs & (1 << X86_ESI))
2284                                 pos -= 4;
2285                         if (pos)
2286                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2287         
2288                         if (cfg->used_int_regs & (1 << X86_ESI))
2289                                 x86_pop_reg (code, X86_ESI);
2290                         if (cfg->used_int_regs & (1 << X86_EDI))
2291                                 x86_pop_reg (code, X86_EDI);
2292                         if (cfg->used_int_regs & (1 << X86_EBX))
2293                                 x86_pop_reg (code, X86_EBX);
2294         
2295                         /* restore ESP/EBP */
2296                         x86_leave (code);
2297                         offset = code - cfg->native_code;
2298                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2299                         x86_jump32 (code, 0);
2300                         break;
2301                 }
2302                 case OP_CHECK_THIS:
2303                         /* ensure ins->sreg1 is not NULL
2304                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2305                          * cmp DWORD PTR [eax], 0
2306                          */
2307                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2308                         break;
2309                 case OP_ARGLIST: {
2310                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2311                         x86_push_reg (code, hreg);
2312                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2313                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2314                         x86_pop_reg (code, hreg);
2315                         break;
2316                 }
2317                 case OP_FCALL:
2318                 case OP_LCALL:
2319                 case OP_VCALL:
2320                 case OP_VOIDCALL:
2321                 case CEE_CALL:
2322                         call = (MonoCallInst*)ins;
2323                         if (ins->flags & MONO_INST_HAS_METHOD)
2324                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2325                         else
2326                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2327                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2328                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2329                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2330                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2331                                  * smart enough to do that optimization yet
2332                                  *
2333                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2334                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2335                                  * (most likely from locality benefits). People with other processors should
2336                                  * check on theirs to see what happens.
2337                                  */
2338                                 if (call->stack_usage == 4) {
2339                                         /* we want to use registers that won't get used soon, so use
2340                                          * ecx, as eax will get allocated first. edx is used by long calls,
2341                                          * so we can't use that.
2342                                          */
2343                                         
2344                                         x86_pop_reg (code, X86_ECX);
2345                                 } else {
2346                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2347                                 }
2348                         }
2349                         code = emit_move_return_value (cfg, ins, code);
2350                         break;
2351                 case OP_FCALL_REG:
2352                 case OP_LCALL_REG:
2353                 case OP_VCALL_REG:
2354                 case OP_VOIDCALL_REG:
2355                 case OP_CALL_REG:
2356                         call = (MonoCallInst*)ins;
2357                         x86_call_reg (code, ins->sreg1);
2358                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2359                                 if (call->stack_usage == 4)
2360                                         x86_pop_reg (code, X86_ECX);
2361                                 else
2362                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2363                         }
2364                         code = emit_move_return_value (cfg, ins, code);
2365                         break;
2366                 case OP_FCALL_MEMBASE:
2367                 case OP_LCALL_MEMBASE:
2368                 case OP_VCALL_MEMBASE:
2369                 case OP_VOIDCALL_MEMBASE:
2370                 case OP_CALL_MEMBASE:
2371                         call = (MonoCallInst*)ins;
2372                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2373                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2374                                 if (call->stack_usage == 4)
2375                                         x86_pop_reg (code, X86_ECX);
2376                                 else
2377                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2378                         }
2379                         code = emit_move_return_value (cfg, ins, code);
2380                         break;
2381                 case OP_OUTARG:
2382                 case OP_X86_PUSH:
2383                         x86_push_reg (code, ins->sreg1);
2384                         break;
2385                 case OP_X86_PUSH_IMM:
2386                         x86_push_imm (code, ins->inst_imm);
2387                         break;
2388                 case OP_X86_PUSH_MEMBASE:
2389                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2390                         break;
2391                 case OP_X86_PUSH_OBJ: 
2392                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2393                         x86_push_reg (code, X86_EDI);
2394                         x86_push_reg (code, X86_ESI);
2395                         x86_push_reg (code, X86_ECX);
2396                         if (ins->inst_offset)
2397                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2398                         else
2399                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2400                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2401                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2402                         x86_cld (code);
2403                         x86_prefix (code, X86_REP_PREFIX);
2404                         x86_movsd (code);
2405                         x86_pop_reg (code, X86_ECX);
2406                         x86_pop_reg (code, X86_ESI);
2407                         x86_pop_reg (code, X86_EDI);
2408                         break;
2409                 case OP_X86_LEA:
2410                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2411                         break;
2412                 case OP_X86_LEA_MEMBASE:
2413                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2414                         break;
2415                 case OP_X86_XCHG:
2416                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2417                         break;
2418                 case OP_LOCALLOC:
2419                         /* keep alignment */
2420                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2421                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2422                         code = mono_emit_stack_alloc (code, ins);
2423                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2424                         break;
2425                 case CEE_RET:
2426                         x86_ret (code);
2427                         break;
2428                 case OP_THROW: {
2429                         x86_push_reg (code, ins->sreg1);
2430                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2431                                                           (gpointer)"mono_arch_throw_exception");
2432                         break;
2433                 }
2434                 case OP_RETHROW: {
2435                         x86_push_reg (code, ins->sreg1);
2436                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2437                                                           (gpointer)"mono_arch_rethrow_exception");
2438                         break;
2439                 }
2440                 case OP_CALL_HANDLER: 
2441                         /* Align stack */
2442 #ifdef __APPLE__
2443                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2444 #endif
2445                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2446                         x86_call_imm (code, 0);
2447 #ifdef __APPLE__
2448                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2449 #endif
2450                         break;
2451                 case OP_LABEL:
2452                         ins->inst_c0 = code - cfg->native_code;
2453                         break;
2454                 case OP_BR:
2455                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2456                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2457                         //break;
2458                         if (ins->flags & MONO_INST_BRLABEL) {
2459                                 if (ins->inst_i0->inst_c0) {
2460                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2461                                 } else {
2462                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2463                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2464                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2465                                                 x86_jump8 (code, 0);
2466                                         else 
2467                                                 x86_jump32 (code, 0);
2468                                 }
2469                         } else {
2470                                 if (ins->inst_target_bb->native_offset) {
2471                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2472                                 } else {
2473                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2474                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2475                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2476                                                 x86_jump8 (code, 0);
2477                                         else 
2478                                                 x86_jump32 (code, 0);
2479                                 } 
2480                         }
2481                         break;
2482                 case OP_BR_REG:
2483                         x86_jump_reg (code, ins->sreg1);
2484                         break;
2485                 case OP_CEQ:
2486                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2487                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2488                         break;
2489                 case OP_CLT:
2490                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2491                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2492                         break;
2493                 case OP_CLT_UN:
2494                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2495                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2496                         break;
2497                 case OP_CGT:
2498                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2499                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2500                         break;
2501                 case OP_CGT_UN:
2502                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2503                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2504                         break;
2505                 case OP_CNE:
2506                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2507                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2508                         break;
2509                 case OP_COND_EXC_EQ:
2510                 case OP_COND_EXC_NE_UN:
2511                 case OP_COND_EXC_LT:
2512                 case OP_COND_EXC_LT_UN:
2513                 case OP_COND_EXC_GT:
2514                 case OP_COND_EXC_GT_UN:
2515                 case OP_COND_EXC_GE:
2516                 case OP_COND_EXC_GE_UN:
2517                 case OP_COND_EXC_LE:
2518                 case OP_COND_EXC_LE_UN:
2519                 case OP_COND_EXC_OV:
2520                 case OP_COND_EXC_NO:
2521                 case OP_COND_EXC_C:
2522                 case OP_COND_EXC_NC:
2523                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2524                         break;
2525                 case CEE_BEQ:
2526                 case CEE_BNE_UN:
2527                 case CEE_BLT:
2528                 case CEE_BLT_UN:
2529                 case CEE_BGT:
2530                 case CEE_BGT_UN:
2531                 case CEE_BGE:
2532                 case CEE_BGE_UN:
2533                 case CEE_BLE:
2534                 case CEE_BLE_UN:
2535                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2536                         break;
2537
2538                 /* floating point opcodes */
2539                 case OP_R8CONST: {
2540                         double d = *(double *)ins->inst_p0;
2541
2542                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2543                                 x86_fldz (code);
2544                         } else if (d == 1.0) {
2545                                 x86_fld1 (code);
2546                         } else {
2547                                 if (cfg->compile_aot) {
2548                                         guint32 *val = (guint32*)&d;
2549                                         x86_push_imm (code, val [1]);
2550                                         x86_push_imm (code, val [0]);
2551                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2552                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2553                                 }
2554                                 else {
2555                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2556                                         x86_fld (code, NULL, TRUE);
2557                                 }
2558                         }
2559                         break;
2560                 }
2561                 case OP_R4CONST: {
2562                         float f = *(float *)ins->inst_p0;
2563
2564                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2565                                 x86_fldz (code);
2566                         } else if (f == 1.0) {
2567                                 x86_fld1 (code);
2568                         } else {
2569                                 if (cfg->compile_aot) {
2570                                         guint32 val = *(guint32*)&f;
2571                                         x86_push_imm (code, val);
2572                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2573                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2574                                 }
2575                                 else {
2576                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2577                                         x86_fld (code, NULL, FALSE);
2578                                 }
2579                         }
2580                         break;
2581                 }
2582                 case OP_STORER8_MEMBASE_REG:
2583                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2584                         break;
2585                 case OP_LOADR8_SPILL_MEMBASE:
2586                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2587                         x86_fxch (code, 1);
2588                         break;
2589                 case OP_LOADR8_MEMBASE:
2590                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2591                         break;
2592                 case OP_STORER4_MEMBASE_REG:
2593                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2594                         break;
2595                 case OP_LOADR4_MEMBASE:
2596                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2597                         break;
2598                 case CEE_CONV_R4: /* FIXME: change precision */
2599                 case CEE_CONV_R8:
2600                         x86_push_reg (code, ins->sreg1);
2601                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2602                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2603                         break;
2604                 case OP_X86_FP_LOAD_I8:
2605                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2606                         break;
2607                 case OP_X86_FP_LOAD_I4:
2608                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2609                         break;
2610                 case OP_FCONV_TO_I1:
2611                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2612                         break;
2613                 case OP_FCONV_TO_U1:
2614                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2615                         break;
2616                 case OP_FCONV_TO_I2:
2617                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2618                         break;
2619                 case OP_FCONV_TO_U2:
2620                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2621                         break;
2622                 case OP_FCONV_TO_I4:
2623                 case OP_FCONV_TO_I:
2624                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2625                         break;
2626                 case OP_FCONV_TO_I8:
2627                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2628                         x86_fnstcw_membase(code, X86_ESP, 0);
2629                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2630                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2631                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2632                         x86_fldcw_membase (code, X86_ESP, 2);
2633                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2634                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2635                         x86_pop_reg (code, ins->dreg);
2636                         x86_pop_reg (code, ins->backend.reg3);
2637                         x86_fldcw_membase (code, X86_ESP, 0);
2638                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2639                         break;
2640                 case OP_LCONV_TO_R_UN: { 
2641                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2642                         guint8 *br;
2643
2644                         /* load 64bit integer to FP stack */
2645                         x86_push_imm (code, 0);
2646                         x86_push_reg (code, ins->sreg2);
2647                         x86_push_reg (code, ins->sreg1);
2648                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2649                         /* store as 80bit FP value */
2650                         x86_fst80_membase (code, X86_ESP, 0);
2651                         
2652                         /* test if lreg is negative */
2653                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2654                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2655         
2656                         /* add correction constant mn */
2657                         x86_fld80_mem (code, mn);
2658                         x86_fld80_membase (code, X86_ESP, 0);
2659                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2660                         x86_fst80_membase (code, X86_ESP, 0);
2661
2662                         x86_patch (br, code);
2663
2664                         x86_fld80_membase (code, X86_ESP, 0);
2665                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2666
2667                         break;
2668                 }
2669                 case OP_LCONV_TO_OVF_I: {
2670                         guint8 *br [3], *label [1];
2671                         MonoInst *tins;
2672
2673                         /* 
2674                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2675                          */
2676                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2677
2678                         /* If the low word top bit is set, see if we are negative */
2679                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2680                         /* We are not negative (no top bit set, check for our top word to be zero */
2681                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2682                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2683                         label [0] = code;
2684
2685                         /* throw exception */
2686                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2687                         if (tins) {
2688                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2689                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2690                                         x86_jump8 (code, 0);
2691                                 else
2692                                         x86_jump32 (code, 0);
2693                         } else {
2694                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2695                                 x86_jump32 (code, 0);
2696                         }
2697         
2698         
2699                         x86_patch (br [0], code);
2700                         /* our top bit is set, check that top word is 0xfffffff */
2701                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2702                 
2703                         x86_patch (br [1], code);
2704                         /* nope, emit exception */
2705                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2706                         x86_patch (br [2], label [0]);
2707
2708                         if (ins->dreg != ins->sreg1)
2709                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2710                         break;
2711                 }
2712                 case OP_FADD:
2713                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2714                         break;
2715                 case OP_FSUB:
2716                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2717                         break;          
2718                 case OP_FMUL:
2719                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2720                         break;          
2721                 case OP_FDIV:
2722                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2723                         break;          
2724                 case OP_FNEG:
2725                         x86_fchs (code);
2726                         break;          
2727                 case OP_SIN:
2728                         x86_fsin (code);
2729                         x86_fldz (code);
2730                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2731                         break;          
2732                 case OP_COS:
2733                         x86_fcos (code);
2734                         x86_fldz (code);
2735                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2736                         break;          
2737                 case OP_ABS:
2738                         x86_fabs (code);
2739                         break;          
2740                 case OP_TAN: {
2741                         /* 
2742                          * it really doesn't make sense to inline all this code,
2743                          * it's here just to show that things may not be as simple 
2744                          * as they appear.
2745                          */
2746                         guchar *check_pos, *end_tan, *pop_jump;
2747                         x86_push_reg (code, X86_EAX);
2748                         x86_fptan (code);
2749                         x86_fnstsw (code);
2750                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2751                         check_pos = code;
2752                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2753                         x86_fstp (code, 0); /* pop the 1.0 */
2754                         end_tan = code;
2755                         x86_jump8 (code, 0);
2756                         x86_fldpi (code);
2757                         x86_fp_op (code, X86_FADD, 0);
2758                         x86_fxch (code, 1);
2759                         x86_fprem1 (code);
2760                         x86_fstsw (code);
2761                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2762                         pop_jump = code;
2763                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2764                         x86_fstp (code, 1);
2765                         x86_fptan (code);
2766                         x86_patch (pop_jump, code);
2767                         x86_fstp (code, 0); /* pop the 1.0 */
2768                         x86_patch (check_pos, code);
2769                         x86_patch (end_tan, code);
2770                         x86_fldz (code);
2771                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2772                         x86_pop_reg (code, X86_EAX);
2773                         break;
2774                 }
2775                 case OP_ATAN:
2776                         x86_fld1 (code);
2777                         x86_fpatan (code);
2778                         x86_fldz (code);
2779                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2780                         break;          
2781                 case OP_SQRT:
2782                         x86_fsqrt (code);
2783                         break;          
2784                 case OP_X86_FPOP:
2785                         x86_fstp (code, 0);
2786                         break;          
2787                 case OP_FREM: {
2788                         guint8 *l1, *l2;
2789
2790                         x86_push_reg (code, X86_EAX);
2791                         /* we need to exchange ST(0) with ST(1) */
2792                         x86_fxch (code, 1);
2793
2794                         /* this requires a loop, because fprem somtimes 
2795                          * returns a partial remainder */
2796                         l1 = code;
2797                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2798                         /* x86_fprem1 (code); */
2799                         x86_fprem (code);
2800                         x86_fnstsw (code);
2801                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
2802                         l2 = code + 2;
2803                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2804
2805                         /* pop result */
2806                         x86_fstp (code, 1);
2807
2808                         x86_pop_reg (code, X86_EAX);
2809                         break;
2810                 }
2811                 case OP_FCOMPARE:
2812                         if (cfg->opt & MONO_OPT_FCMOV) {
2813                                 x86_fcomip (code, 1);
2814                                 x86_fstp (code, 0);
2815                                 break;
2816                         }
2817                         /* this overwrites EAX */
2818                         EMIT_FPCOMPARE(code);
2819                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2820                         break;
2821                 case OP_FCEQ:
2822                         if (cfg->opt & MONO_OPT_FCMOV) {
2823                                 /* zeroing the register at the start results in 
2824                                  * shorter and faster code (we can also remove the widening op)
2825                                  */
2826                                 guchar *unordered_check;
2827                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2828                                 x86_fcomip (code, 1);
2829                                 x86_fstp (code, 0);
2830                                 unordered_check = code;
2831                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2832                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2833                                 x86_patch (unordered_check, code);
2834                                 break;
2835                         }
2836                         if (ins->dreg != X86_EAX) 
2837                                 x86_push_reg (code, X86_EAX);
2838
2839                         EMIT_FPCOMPARE(code);
2840                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2841                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2842                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2843                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2844
2845                         if (ins->dreg != X86_EAX) 
2846                                 x86_pop_reg (code, X86_EAX);
2847                         break;
2848                 case OP_FCLT:
2849                 case OP_FCLT_UN:
2850                         if (cfg->opt & MONO_OPT_FCMOV) {
2851                                 /* zeroing the register at the start results in 
2852                                  * shorter and faster code (we can also remove the widening op)
2853                                  */
2854                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2855                                 x86_fcomip (code, 1);
2856                                 x86_fstp (code, 0);
2857                                 if (ins->opcode == OP_FCLT_UN) {
2858                                         guchar *unordered_check = code;
2859                                         guchar *jump_to_end;
2860                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2861                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2862                                         jump_to_end = code;
2863                                         x86_jump8 (code, 0);
2864                                         x86_patch (unordered_check, code);
2865                                         x86_inc_reg (code, ins->dreg);
2866                                         x86_patch (jump_to_end, code);
2867                                 } else {
2868                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2869                                 }
2870                                 break;
2871                         }
2872                         if (ins->dreg != X86_EAX) 
2873                                 x86_push_reg (code, X86_EAX);
2874
2875                         EMIT_FPCOMPARE(code);
2876                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2877                         if (ins->opcode == OP_FCLT_UN) {
2878                                 guchar *is_not_zero_check, *end_jump;
2879                                 is_not_zero_check = code;
2880                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2881                                 end_jump = code;
2882                                 x86_jump8 (code, 0);
2883                                 x86_patch (is_not_zero_check, code);
2884                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2885
2886                                 x86_patch (end_jump, code);
2887                         }
2888                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2889                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2890
2891                         if (ins->dreg != X86_EAX) 
2892                                 x86_pop_reg (code, X86_EAX);
2893                         break;
2894                 case OP_FCGT:
2895                 case OP_FCGT_UN:
2896                         if (cfg->opt & MONO_OPT_FCMOV) {
2897                                 /* zeroing the register at the start results in 
2898                                  * shorter and faster code (we can also remove the widening op)
2899                                  */
2900                                 guchar *unordered_check;
2901                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2902                                 x86_fcomip (code, 1);
2903                                 x86_fstp (code, 0);
2904                                 if (ins->opcode == OP_FCGT) {
2905                                         unordered_check = code;
2906                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2907                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2908                                         x86_patch (unordered_check, code);
2909                                 } else {
2910                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2911                                 }
2912                                 break;
2913                         }
2914                         if (ins->dreg != X86_EAX) 
2915                                 x86_push_reg (code, X86_EAX);
2916
2917                         EMIT_FPCOMPARE(code);
2918                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2919                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2920                         if (ins->opcode == OP_FCGT_UN) {
2921                                 guchar *is_not_zero_check, *end_jump;
2922                                 is_not_zero_check = code;
2923                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2924                                 end_jump = code;
2925                                 x86_jump8 (code, 0);
2926                                 x86_patch (is_not_zero_check, code);
2927                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2928         
2929                                 x86_patch (end_jump, code);
2930                         }
2931                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2932                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2933
2934                         if (ins->dreg != X86_EAX) 
2935                                 x86_pop_reg (code, X86_EAX);
2936                         break;
2937                 case OP_FBEQ:
2938                         if (cfg->opt & MONO_OPT_FCMOV) {
2939                                 guchar *jump = code;
2940                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
2941                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2942                                 x86_patch (jump, code);
2943                                 break;
2944                         }
2945                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2946                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2947                         break;
2948                 case OP_FBNE_UN:
2949                         /* Branch if C013 != 100 */
2950                         if (cfg->opt & MONO_OPT_FCMOV) {
2951                                 /* branch if !ZF or (PF|CF) */
2952                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2953                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2954                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
2955                                 break;
2956                         }
2957                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2958                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2959                         break;
2960                 case OP_FBLT:
2961                         if (cfg->opt & MONO_OPT_FCMOV) {
2962                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2963                                 break;
2964                         }
2965                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2966                         break;
2967                 case OP_FBLT_UN:
2968                         if (cfg->opt & MONO_OPT_FCMOV) {
2969                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2970                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2971                                 break;
2972                         }
2973                         if (ins->opcode == OP_FBLT_UN) {
2974                                 guchar *is_not_zero_check, *end_jump;
2975                                 is_not_zero_check = code;
2976                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2977                                 end_jump = code;
2978                                 x86_jump8 (code, 0);
2979                                 x86_patch (is_not_zero_check, code);
2980                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2981
2982                                 x86_patch (end_jump, code);
2983                         }
2984                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2985                         break;
2986                 case OP_FBGT:
2987                 case OP_FBGT_UN:
2988                         if (cfg->opt & MONO_OPT_FCMOV) {
2989                                 if (ins->opcode == OP_FBGT) {
2990                                         guchar *br1;
2991
2992                                         /* skip branch if C1=1 */
2993                                         br1 = code;
2994                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2995                                         /* branch if (C0 | C3) = 1 */
2996                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
2997                                         x86_patch (br1, code);
2998                                 } else {
2999                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3000                                 }
3001                                 break;
3002                         }
3003                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3004                         if (ins->opcode == OP_FBGT_UN) {
3005                                 guchar *is_not_zero_check, *end_jump;
3006                                 is_not_zero_check = code;
3007                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3008                                 end_jump = code;
3009                                 x86_jump8 (code, 0);
3010                                 x86_patch (is_not_zero_check, code);
3011                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3012
3013                                 x86_patch (end_jump, code);
3014                         }
3015                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3016                         break;
3017                 case OP_FBGE:
3018                         /* Branch if C013 == 100 or 001 */
3019                         if (cfg->opt & MONO_OPT_FCMOV) {
3020                                 guchar *br1;
3021
3022                                 /* skip branch if C1=1 */
3023                                 br1 = code;
3024                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3025                                 /* branch if (C0 | C3) = 1 */
3026                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3027                                 x86_patch (br1, code);
3028                                 break;
3029                         }
3030                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3031                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3032                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3033                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3034                         break;
3035                 case OP_FBGE_UN:
3036                         /* Branch if C013 == 000 */
3037                         if (cfg->opt & MONO_OPT_FCMOV) {
3038                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3039                                 break;
3040                         }
3041                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3042                         break;
3043                 case OP_FBLE:
3044                         /* Branch if C013=000 or 100 */
3045                         if (cfg->opt & MONO_OPT_FCMOV) {
3046                                 guchar *br1;
3047
3048                                 /* skip branch if C1=1 */
3049                                 br1 = code;
3050                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3051                                 /* branch if C0=0 */
3052                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3053                                 x86_patch (br1, code);
3054                                 break;
3055                         }
3056                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3057                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3058                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3059                         break;
3060                 case OP_FBLE_UN:
3061                         /* Branch if C013 != 001 */
3062                         if (cfg->opt & MONO_OPT_FCMOV) {
3063                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3064                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3065                                 break;
3066                         }
3067                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3068                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3069                         break;
3070                 case OP_CKFINITE: {
3071                         x86_push_reg (code, X86_EAX);
3072                         x86_fxam (code);
3073                         x86_fnstsw (code);
3074                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3075                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3076                         x86_pop_reg (code, X86_EAX);
3077                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3078                         break;
3079                 }
3080                 case OP_TLS_GET: {
3081                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3082                         break;
3083                 }
3084                 case OP_MEMORY_BARRIER: {
3085                         /* Not needed on x86 */
3086                         break;
3087                 }
3088                 case OP_ATOMIC_ADD_I4: {
3089                         int dreg = ins->dreg;
3090
3091                         if (dreg == ins->inst_basereg) {
3092                                 x86_push_reg (code, ins->sreg2);
3093                                 dreg = ins->sreg2;
3094                         } 
3095                         
3096                         if (dreg != ins->sreg2)
3097                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3098
3099                         x86_prefix (code, X86_LOCK_PREFIX);
3100                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3101
3102                         if (dreg != ins->dreg) {
3103                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3104                                 x86_pop_reg (code, dreg);
3105                         }
3106
3107                         break;
3108                 }
3109                 case OP_ATOMIC_ADD_NEW_I4: {
3110                         int dreg = ins->dreg;
3111
3112                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3113                         if (ins->sreg2 == dreg) {
3114                                 if (dreg == X86_EBX) {
3115                                         dreg = X86_EDI;
3116                                         if (ins->inst_basereg == X86_EDI)
3117                                                 dreg = X86_ESI;
3118                                 } else {
3119                                         dreg = X86_EBX;
3120                                         if (ins->inst_basereg == X86_EBX)
3121                                                 dreg = X86_EDI;
3122                                 }
3123                         } else if (ins->inst_basereg == dreg) {
3124                                 if (dreg == X86_EBX) {
3125                                         dreg = X86_EDI;
3126                                         if (ins->sreg2 == X86_EDI)
3127                                                 dreg = X86_ESI;
3128                                 } else {
3129                                         dreg = X86_EBX;
3130                                         if (ins->sreg2 == X86_EBX)
3131                                                 dreg = X86_EDI;
3132                                 }
3133                         }
3134
3135                         if (dreg != ins->dreg) {
3136                                 x86_push_reg (code, dreg);
3137                         }
3138
3139                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3140                         x86_prefix (code, X86_LOCK_PREFIX);
3141                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3142                         /* dreg contains the old value, add with sreg2 value */
3143                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3144                         
3145                         if (ins->dreg != dreg) {
3146                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3147                                 x86_pop_reg (code, dreg);
3148                         }
3149
3150                         break;
3151                 }
3152                 case OP_ATOMIC_EXCHANGE_I4: {
3153                         guchar *br[2];
3154                         int sreg2 = ins->sreg2;
3155                         int breg = ins->inst_basereg;
3156
3157                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3158                          * hack to overcome limits in x86 reg allocator 
3159                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3160                          */
3161                         if (ins->dreg != X86_EAX)
3162                                 x86_push_reg (code, X86_EAX);
3163                         
3164                         /* We need the EAX reg for the cmpxchg */
3165                         if (ins->sreg2 == X86_EAX) {
3166                                 x86_push_reg (code, X86_EDX);
3167                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3168                                 sreg2 = X86_EDX;
3169                         }
3170
3171                         if (breg == X86_EAX) {
3172                                 x86_push_reg (code, X86_ESI);
3173                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3174                                 breg = X86_ESI;
3175                         }
3176
3177                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3178
3179                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3180                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3181                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3182                         x86_patch (br [1], br [0]);
3183
3184                         if (breg != ins->inst_basereg)
3185                                 x86_pop_reg (code, X86_ESI);
3186
3187                         if (ins->dreg != X86_EAX) {
3188                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3189                                 x86_pop_reg (code, X86_EAX);
3190                         }
3191
3192                         if (ins->sreg2 != sreg2)
3193                                 x86_pop_reg (code, X86_EDX);
3194
3195                         break;
3196                 }
3197                 default:
3198                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3199                         g_assert_not_reached ();
3200                 }
3201
3202                 if ((code - cfg->native_code - offset) > max_len) {
3203                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3204                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3205                         g_assert_not_reached ();
3206                 }
3207                
3208                 cpos += max_len;
3209
3210                 last_ins = ins;
3211                 last_offset = offset;
3212                 
3213                 ins = ins->next;
3214         }
3215
3216         cfg->code_len = code - cfg->native_code;
3217 }
3218
3219 void
3220 mono_arch_register_lowlevel_calls (void)
3221 {
3222 }
3223
3224 void
3225 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3226 {
3227         MonoJumpInfo *patch_info;
3228         gboolean compile_aot = !run_cctors;
3229
3230         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3231                 unsigned char *ip = patch_info->ip.i + code;
3232                 const unsigned char *target;
3233
3234                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3235
3236                 if (compile_aot) {
3237                         switch (patch_info->type) {
3238                         case MONO_PATCH_INFO_BB:
3239                         case MONO_PATCH_INFO_LABEL:
3240                                 break;
3241                         default:
3242                                 /* No need to patch these */
3243                                 continue;
3244                         }
3245                 }
3246
3247                 switch (patch_info->type) {
3248                 case MONO_PATCH_INFO_IP:
3249                         *((gconstpointer *)(ip)) = target;
3250                         break;
3251                 case MONO_PATCH_INFO_CLASS_INIT: {
3252                         guint8 *code = ip;
3253                         /* Might already been changed to a nop */
3254                         x86_call_code (code, 0);
3255                         x86_patch (ip, target);
3256                         break;
3257                 }
3258                 case MONO_PATCH_INFO_ABS:
3259                 case MONO_PATCH_INFO_METHOD:
3260                 case MONO_PATCH_INFO_METHOD_JUMP:
3261                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3262                 case MONO_PATCH_INFO_BB:
3263                 case MONO_PATCH_INFO_LABEL:
3264                         x86_patch (ip, target);
3265                         break;
3266                 case MONO_PATCH_INFO_NONE:
3267                         break;
3268                 default: {
3269                         guint32 offset = mono_arch_get_patch_offset (ip);
3270                         *((gconstpointer *)(ip + offset)) = target;
3271                         break;
3272                 }
3273                 }
3274         }
3275 }
3276
3277 guint8 *
3278 mono_arch_emit_prolog (MonoCompile *cfg)
3279 {
3280         MonoMethod *method = cfg->method;
3281         MonoBasicBlock *bb;
3282         MonoMethodSignature *sig;
3283         MonoInst *inst;
3284         int alloc_size, pos, max_offset, i;
3285         guint8 *code;
3286
3287         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3288         code = cfg->native_code = g_malloc (cfg->code_size);
3289
3290         x86_push_reg (code, X86_EBP);
3291         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3292
3293         alloc_size = cfg->stack_offset;
3294         pos = 0;
3295
3296         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3297                 /* Might need to attach the thread to the JIT */
3298                 if (lmf_tls_offset != -1) {
3299                         guint8 *buf;
3300
3301                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3302                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3303                         buf = code;
3304                         x86_branch8 (code, X86_CC_NE, 0, 0);
3305                         x86_push_imm (code, cfg->domain);
3306                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3307                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3308                         x86_patch (buf, code);
3309 #ifdef PLATFORM_WIN32
3310                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3311                         /* FIXME: Add a separate key for LMF to avoid this */
3312                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3313 #endif
3314                 } else {
3315                         g_assert (!cfg->compile_aot);
3316                         x86_push_imm (code, cfg->domain);
3317                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3318                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3319                 }
3320         }
3321
3322         if (method->save_lmf) {
3323                 pos += sizeof (MonoLMF);
3324
3325                 /* save the current IP */
3326                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3327                 x86_push_imm_template (code);
3328
3329                 /* save all caller saved regs */
3330                 x86_push_reg (code, X86_EBP);
3331                 x86_push_reg (code, X86_ESI);
3332                 x86_push_reg (code, X86_EDI);
3333                 x86_push_reg (code, X86_EBX);
3334
3335                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3336                         /*
3337                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3338                          * through the mono_lmf_addr TLS variable.
3339                          */
3340                         /* %eax = previous_lmf */
3341                         x86_prefix (code, X86_GS_PREFIX);
3342                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3343                         /* skip method_info + lmf */
3344                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3345                         /* push previous_lmf */
3346                         x86_push_reg (code, X86_EAX);
3347                         /* new lmf = ESP */
3348                         x86_prefix (code, X86_GS_PREFIX);
3349                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
3350                 } else {
3351                         /* get the address of lmf for the current thread */
3352                         /* 
3353                          * This is performance critical so we try to use some tricks to make
3354                          * it fast.
3355                          */                                                                        
3356
3357                         if (lmf_addr_tls_offset != -1) {
3358                                 /* Load lmf quicky using the GS register */
3359                                 code = emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
3360 #ifdef PLATFORM_WIN32
3361                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3362                                 /* FIXME: Add a separate key for LMF to avoid this */
3363                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3364 #endif
3365                         } else {
3366                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3367                         }
3368
3369                         /* Skip method info */
3370                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3371
3372                         /* push lmf */
3373                         x86_push_reg (code, X86_EAX); 
3374                         /* push *lfm (previous_lmf) */
3375                         x86_push_membase (code, X86_EAX, 0);
3376                         /* *(lmf) = ESP */
3377                         x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3378                 }
3379         } else {
3380
3381                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3382                         x86_push_reg (code, X86_EBX);
3383                         pos += 4;
3384                 }
3385
3386                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3387                         x86_push_reg (code, X86_EDI);
3388                         pos += 4;
3389                 }
3390
3391                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3392                         x86_push_reg (code, X86_ESI);
3393                         pos += 4;
3394                 }
3395         }
3396
3397         alloc_size -= pos;
3398
3399 #if __APPLE__
3400         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3401         {
3402                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3403                 if (tot & 4) {
3404                         tot += 4;
3405                         alloc_size += 4;
3406                 }
3407                 if (tot & 8) {
3408                         alloc_size += 8;
3409                 }
3410         }
3411 #endif
3412
3413         if (alloc_size) {
3414                 /* See mono_emit_stack_alloc */
3415 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3416                 guint32 remaining_size = alloc_size;
3417                 while (remaining_size >= 0x1000) {
3418                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3419                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3420                         remaining_size -= 0x1000;
3421                 }
3422                 if (remaining_size)
3423                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3424 #else
3425                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3426 #endif
3427         }
3428
3429 #if __APPLE_
3430         /* check the stack is aligned */
3431         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3432         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3433         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3434         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3435         x86_breakpoint (code);
3436 #endif
3437
3438         /* compute max_offset in order to use short forward jumps */
3439         max_offset = 0;
3440         if (cfg->opt & MONO_OPT_BRANCH) {
3441                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3442                         MonoInst *ins = bb->code;
3443                         bb->max_offset = max_offset;
3444
3445                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3446                                 max_offset += 6;
3447                         /* max alignment for loops */
3448                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3449                                 max_offset += LOOP_ALIGNMENT;
3450
3451                         while (ins) {
3452                                 if (ins->opcode == OP_LABEL)
3453                                         ins->inst_c1 = max_offset;
3454                                 
3455                                 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3456                                 ins = ins->next;
3457                         }
3458                 }
3459         }
3460
3461         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3462                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3463
3464         /* load arguments allocated to register from the stack */
3465         sig = mono_method_signature (method);
3466         pos = 0;
3467
3468         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3469                 inst = cfg->args [pos];
3470                 if (inst->opcode == OP_REGVAR) {
3471                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3472                         if (cfg->verbose_level > 2)
3473                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3474                 }
3475                 pos++;
3476         }
3477
3478         cfg->code_len = code - cfg->native_code;
3479
3480         return code;
3481 }
3482
3483 void
3484 mono_arch_emit_epilog (MonoCompile *cfg)
3485 {
3486         MonoMethod *method = cfg->method;
3487         MonoMethodSignature *sig = mono_method_signature (method);
3488         int quad, pos;
3489         guint32 stack_to_pop;
3490         guint8 *code;
3491         int max_epilog_size = 16;
3492         CallInfo *cinfo;
3493         
3494         if (cfg->method->save_lmf)
3495                 max_epilog_size += 128;
3496         
3497         if (mono_jit_trace_calls != NULL)
3498                 max_epilog_size += 50;
3499
3500         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3501                 cfg->code_size *= 2;
3502                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3503                 mono_jit_stats.code_reallocs++;
3504         }
3505
3506         code = cfg->native_code + cfg->code_len;
3507
3508         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3509                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3510
3511         /* the code restoring the registers must be kept in sync with OP_JMP */
3512         pos = 0;
3513         
3514         if (method->save_lmf) {
3515                 gint32 prev_lmf_reg;
3516                 gint32 lmf_offset = -sizeof (MonoLMF);
3517
3518                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3519                         /*
3520                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3521                          * through the mono_lmf_addr TLS variable.
3522                          */
3523                         /* reg = previous_lmf */
3524                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3525
3526                         /* lmf = previous_lmf */
3527                         x86_prefix (code, X86_GS_PREFIX);
3528                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
3529                 } else {
3530                         /* Find a spare register */
3531                         switch (sig->ret->type) {
3532                         case MONO_TYPE_I8:
3533                         case MONO_TYPE_U8:
3534                                 prev_lmf_reg = X86_EDI;
3535                                 cfg->used_int_regs |= (1 << X86_EDI);
3536                                 break;
3537                         default:
3538                                 prev_lmf_reg = X86_EDX;
3539                                 break;
3540                         }
3541
3542                         /* reg = previous_lmf */
3543                         x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3544
3545                         /* ecx = lmf */
3546                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3547
3548                         /* *(lmf) = previous_lmf */
3549                         x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3550                 }
3551
3552                 /* restore caller saved regs */
3553                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3554                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3555                 }
3556
3557                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3558                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3559                 }
3560                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3561                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3562                 }
3563
3564                 /* EBP is restored by LEAVE */
3565         } else {
3566                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3567                         pos -= 4;
3568                 }
3569                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3570                         pos -= 4;
3571                 }
3572                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3573                         pos -= 4;
3574                 }
3575
3576                 if (pos)
3577                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3578
3579                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3580                         x86_pop_reg (code, X86_ESI);
3581                 }
3582                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3583                         x86_pop_reg (code, X86_EDI);
3584                 }
3585                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3586                         x86_pop_reg (code, X86_EBX);
3587                 }
3588         }
3589
3590         /* Load returned vtypes into registers if needed */
3591         cinfo = get_call_info (cfg->mempool, sig, FALSE);
3592         if (cinfo->ret.storage == ArgValuetypeInReg) {
3593                 for (quad = 0; quad < 2; quad ++) {
3594                         switch (cinfo->ret.pair_storage [quad]) {
3595                         case ArgInIReg:
3596                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3597                                 break;
3598                         case ArgOnFloatFpStack:
3599                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3600                                 break;
3601                         case ArgOnDoubleFpStack:
3602                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3603                                 break;
3604                         case ArgNone:
3605                                 break;
3606                         default:
3607                                 g_assert_not_reached ();
3608                         }
3609                 }
3610         }
3611
3612         x86_leave (code);
3613
3614         if (CALLCONV_IS_STDCALL (sig)) {
3615                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3616
3617                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3618         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3619                 stack_to_pop = 4;
3620         else
3621                 stack_to_pop = 0;
3622
3623         if (stack_to_pop)
3624                 x86_ret_imm (code, stack_to_pop);
3625         else
3626                 x86_ret (code);
3627
3628         cfg->code_len = code - cfg->native_code;
3629
3630         g_assert (cfg->code_len < cfg->code_size);
3631 }
3632
3633 void
3634 mono_arch_emit_exceptions (MonoCompile *cfg)
3635 {
3636         MonoJumpInfo *patch_info;
3637         int nthrows, i;
3638         guint8 *code;
3639         MonoClass *exc_classes [16];
3640         guint8 *exc_throw_start [16], *exc_throw_end [16];
3641         guint32 code_size;
3642         int exc_count = 0;
3643
3644         /* Compute needed space */
3645         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3646                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3647                         exc_count++;
3648         }
3649
3650         /* 
3651          * make sure we have enough space for exceptions
3652          * 16 is the size of two push_imm instructions and a call
3653          */
3654         if (cfg->compile_aot)
3655                 code_size = exc_count * 32;
3656         else
3657                 code_size = exc_count * 16;
3658
3659         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3660                 cfg->code_size *= 2;
3661                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3662                 mono_jit_stats.code_reallocs++;
3663         }
3664
3665         code = cfg->native_code + cfg->code_len;
3666
3667         nthrows = 0;
3668         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3669                 switch (patch_info->type) {
3670                 case MONO_PATCH_INFO_EXC: {
3671                         MonoClass *exc_class;
3672                         guint8 *buf, *buf2;
3673                         guint32 throw_ip;
3674
3675                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3676
3677                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3678                         g_assert (exc_class);
3679                         throw_ip = patch_info->ip.i;
3680
3681                         /* Find a throw sequence for the same exception class */
3682                         for (i = 0; i < nthrows; ++i)
3683                                 if (exc_classes [i] == exc_class)
3684                                         break;
3685                         if (i < nthrows) {
3686                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3687                                 x86_jump_code (code, exc_throw_start [i]);
3688                                 patch_info->type = MONO_PATCH_INFO_NONE;
3689                         }
3690                         else {
3691                                 guint32 size;
3692
3693                                 /* Compute size of code following the push <OFFSET> */
3694                                 size = 5 + 5;
3695
3696                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3697                                         /* Use the shorter form */
3698                                         buf = buf2 = code;
3699                                         x86_push_imm (code, 0);
3700                                 }
3701                                 else {
3702                                         buf = code;
3703                                         x86_push_imm (code, 0xf0f0f0f0);
3704                                         buf2 = code;
3705                                 }
3706
3707                                 if (nthrows < 16) {
3708                                         exc_classes [nthrows] = exc_class;
3709                                         exc_throw_start [nthrows] = code;
3710                                 }
3711
3712                                 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
3713                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3714                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3715                                 patch_info->ip.i = code - cfg->native_code;
3716                                 x86_call_code (code, 0);
3717                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3718                                 while (buf < buf2)
3719                                         x86_nop (buf);
3720
3721                                 if (nthrows < 16) {
3722                                         exc_throw_end [nthrows] = code;
3723                                         nthrows ++;
3724                                 }
3725                         }
3726                         break;
3727                 }
3728                 default:
3729                         /* do nothing */
3730                         break;
3731                 }
3732         }
3733
3734         cfg->code_len = code - cfg->native_code;
3735
3736         g_assert (cfg->code_len < cfg->code_size);
3737 }
3738
3739 void
3740 mono_arch_flush_icache (guint8 *code, gint size)
3741 {
3742         /* not needed */
3743 }
3744
3745 void
3746 mono_arch_flush_register_windows (void)
3747 {
3748 }
3749
3750 /*
3751  * Support for fast access to the thread-local lmf structure using the GS
3752  * segment register on NPTL + kernel 2.6.x.
3753  */
3754
3755 static gboolean tls_offset_inited = FALSE;
3756
3757 void
3758 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3759 {
3760         if (!tls_offset_inited) {
3761                 if (!getenv ("MONO_NO_TLS")) {
3762 #ifdef PLATFORM_WIN32
3763                         /* 
3764                          * We need to init this multiple times, since when we are first called, the key might not
3765                          * be initialized yet.
3766                          */
3767                         appdomain_tls_offset = mono_domain_get_tls_key ();
3768                         lmf_tls_offset = mono_get_jit_tls_key ();
3769                         thread_tls_offset = mono_thread_get_tls_key ();
3770
3771                         /* Only 64 tls entries can be accessed using inline code */
3772                         if (appdomain_tls_offset >= 64)
3773                                 appdomain_tls_offset = -1;
3774                         if (lmf_tls_offset >= 64)
3775                                 lmf_tls_offset = -1;
3776                         if (thread_tls_offset >= 64)
3777                                 thread_tls_offset = -1;
3778 #else
3779 #if MONO_XEN_OPT
3780                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
3781 #endif
3782                         tls_offset_inited = TRUE;
3783                         appdomain_tls_offset = mono_domain_get_tls_offset ();
3784                         lmf_tls_offset = mono_get_lmf_tls_offset ();
3785                         lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
3786                         thread_tls_offset = mono_thread_get_tls_offset ();
3787 #endif
3788                 }
3789         }               
3790 }
3791
3792 void
3793 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
3794 {
3795 }
3796
3797 void
3798 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3799 {
3800         MonoCallInst *call = (MonoCallInst*)inst;
3801         CallInfo *cinfo = get_call_info (cfg->mempool, inst->signature, FALSE);
3802
3803         /* add the this argument */
3804         if (this_reg != -1) {
3805                 if (cinfo->args [0].storage == ArgInIReg) {
3806                         MonoInst *this;
3807                         MONO_INST_NEW (cfg, this, OP_MOVE);
3808                         this->type = this_type;
3809                         this->sreg1 = this_reg;
3810                         this->dreg = mono_regstate_next_int (cfg->rs);
3811                         mono_bblock_add_inst (cfg->cbb, this);
3812
3813                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
3814                 }
3815                 else {
3816                         MonoInst *this;
3817                         MONO_INST_NEW (cfg, this, OP_OUTARG);
3818                         this->type = this_type;
3819                         this->sreg1 = this_reg;
3820                         mono_bblock_add_inst (cfg->cbb, this);
3821                 }
3822         }
3823
3824         if (vt_reg != -1) {
3825                 MonoInst *vtarg;
3826
3827                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3828                         /*
3829                          * The valuetype is in EAX:EDX after the call, needs to be copied to
3830                          * the stack. Save the address here, so the call instruction can
3831                          * access it.
3832                          */
3833                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
3834                         vtarg->inst_destbasereg = X86_ESP;
3835                         vtarg->inst_offset = inst->stack_usage;
3836                         vtarg->sreg1 = vt_reg;
3837                         mono_bblock_add_inst (cfg->cbb, vtarg);
3838                 }
3839                 else if (cinfo->ret.storage == ArgInIReg) {
3840                         /* The return address is passed in a register */
3841                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
3842                         vtarg->sreg1 = vt_reg;
3843                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
3844                         mono_bblock_add_inst (cfg->cbb, vtarg);
3845
3846                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
3847                 } else {
3848                         MonoInst *vtarg;
3849                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3850                         vtarg->type = STACK_MP;
3851                         vtarg->sreg1 = vt_reg;
3852                         mono_bblock_add_inst (cfg->cbb, vtarg);
3853                 }
3854         }
3855 }
3856
3857 MonoInst*
3858 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3859 {
3860         MonoInst *ins = NULL;
3861
3862         if (cmethod->klass == mono_defaults.math_class) {
3863                 if (strcmp (cmethod->name, "Sin") == 0) {
3864                         MONO_INST_NEW (cfg, ins, OP_SIN);
3865                         ins->inst_i0 = args [0];
3866                 } else if (strcmp (cmethod->name, "Cos") == 0) {
3867                         MONO_INST_NEW (cfg, ins, OP_COS);
3868                         ins->inst_i0 = args [0];
3869                 } else if (strcmp (cmethod->name, "Tan") == 0) {
3870                         MONO_INST_NEW (cfg, ins, OP_TAN);
3871                         ins->inst_i0 = args [0];
3872                 } else if (strcmp (cmethod->name, "Atan") == 0) {
3873                         MONO_INST_NEW (cfg, ins, OP_ATAN);
3874                         ins->inst_i0 = args [0];
3875                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
3876                         MONO_INST_NEW (cfg, ins, OP_SQRT);
3877                         ins->inst_i0 = args [0];
3878                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
3879                         MONO_INST_NEW (cfg, ins, OP_ABS);
3880                         ins->inst_i0 = args [0];
3881                 }
3882 #if 0
3883                 /* OP_FREM is not IEEE compatible */
3884                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
3885                         MONO_INST_NEW (cfg, ins, OP_FREM);
3886                         ins->inst_i0 = args [0];
3887                         ins->inst_i1 = args [1];
3888                 }
3889 #endif
3890         } else if (cmethod->klass == mono_defaults.thread_class &&
3891                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
3892                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
3893         } else if(cmethod->klass->image == mono_defaults.corlib &&
3894                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
3895                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
3896
3897                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3898                         MonoInst *ins_iconst;
3899
3900                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3901                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3902                         ins_iconst->inst_c0 = 1;
3903
3904                         ins->inst_i0 = args [0];
3905                         ins->inst_i1 = ins_iconst;
3906                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3907                         MonoInst *ins_iconst;
3908
3909                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3910                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3911                         ins_iconst->inst_c0 = -1;
3912
3913                         ins->inst_i0 = args [0];
3914                         ins->inst_i1 = ins_iconst;
3915                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3916                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
3917
3918                         ins->inst_i0 = args [0];
3919                         ins->inst_i1 = args [1];
3920                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3921                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3922
3923                         ins->inst_i0 = args [0];
3924                         ins->inst_i1 = args [1];
3925                 }
3926         }
3927
3928         return ins;
3929 }
3930
3931
3932 gboolean
3933 mono_arch_print_tree (MonoInst *tree, int arity)
3934 {
3935         return 0;
3936 }
3937
3938 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
3939 {
3940         MonoInst* ins;
3941         
3942         if (appdomain_tls_offset == -1)
3943                 return NULL;
3944
3945         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3946         ins->inst_offset = appdomain_tls_offset;
3947         return ins;
3948 }
3949
3950 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
3951 {
3952         MonoInst* ins;
3953
3954         if (thread_tls_offset == -1)
3955                 return NULL;
3956
3957         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3958         ins->inst_offset = thread_tls_offset;
3959         return ins;
3960 }
3961
3962 guint32
3963 mono_arch_get_patch_offset (guint8 *code)
3964 {
3965         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
3966                 return 2;
3967         else if ((code [0] == 0xba))
3968                 return 1;
3969         else if ((code [0] == 0x68))
3970                 /* push IMM */
3971                 return 1;
3972         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
3973                 /* push <OFFSET>(<REG>) */
3974                 return 2;
3975         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
3976                 /* call *<OFFSET>(<REG>) */
3977                 return 2;
3978         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
3979                 /* fldl <ADDR> */
3980                 return 2;
3981         else if ((code [0] == 0x58) && (code [1] == 0x05))
3982                 /* pop %eax; add <OFFSET>, %eax */
3983                 return 2;
3984         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
3985                 /* pop <REG>; add <OFFSET>, <REG> */
3986                 return 3;
3987         else {
3988                 g_assert_not_reached ();
3989                 return -1;
3990         }
3991 }
3992
3993 gpointer*
3994 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
3995 {
3996         guint8 reg = 0;
3997         gint32 disp = 0;
3998
3999         /* go to the start of the call instruction
4000          *
4001          * address_byte = (m << 6) | (o << 3) | reg
4002          * call opcode: 0xff address_byte displacement
4003          * 0xff m=1,o=2 imm8
4004          * 0xff m=2,o=2 imm32
4005          */
4006         code -= 6;
4007
4008         /* 
4009          * A given byte sequence can match more than case here, so we have to be
4010          * really careful about the ordering of the cases. Longer sequences
4011          * come first.
4012          */
4013         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
4014                 /*
4015                  * This is an interface call
4016                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
4017                  * ff 10                   call   *(%eax)
4018                  */
4019                 reg = x86_modrm_rm (code [5]);
4020                 disp = 0;
4021         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4022                 reg = code [4] & 0x07;
4023                 disp = (signed char)code [5];
4024         } else {
4025                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4026                         reg = code [1] & 0x07;
4027                         disp = *((gint32*)(code + 2));
4028                 } else if ((code [1] == 0xe8)) {
4029                         return NULL;
4030                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4031                         /*
4032                          * This is a interface call
4033                          * 8b 40 30   mov    0x30(%eax),%eax
4034                          * ff 10      call   *(%eax)
4035                          */
4036                         disp = 0;
4037                         reg = code [5] & 0x07;
4038                 }
4039                 else
4040                         return NULL;
4041         }
4042
4043         return (gpointer*)(((gint32)(regs [reg])) + disp);
4044 }
4045
4046 gpointer* 
4047 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
4048 {
4049         guint8 reg = 0;
4050         gint32 disp = 0;
4051
4052         code -= 7;
4053         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
4054                 reg = x86_modrm_rm (code [1]);
4055                 disp = code [4];
4056
4057                 if (reg == X86_EAX)
4058                         return NULL;
4059                 else
4060                         return (gpointer*)(((gint32)(regs [reg])) + disp);
4061         }
4062
4063         return NULL;
4064 }