2007-06-02 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
17
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/utils/mono-math.h>
23
24 #include "trace.h"
25 #include "mini-x86.h"
26 #include "inssel.h"
27 #include "cpu-x86.h"
28
29 /* On windows, these hold the key returned by TlsAlloc () */
30 static gint lmf_tls_offset = -1;
31 static gint lmf_addr_tls_offset = -1;
32 static gint appdomain_tls_offset = -1;
33 static gint thread_tls_offset = -1;
34
35 #ifdef MONO_XEN_OPT
36 static gboolean optimize_for_xen = TRUE;
37 #else
38 #define optimize_for_xen 0
39 #endif
40
41 #ifdef PLATFORM_WIN32
42 static gboolean is_win32 = TRUE;
43 #else
44 static gboolean is_win32 = FALSE;
45 #endif
46
47 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
48
49 #define ARGS_OFFSET 8
50
51 #ifdef PLATFORM_WIN32
52 /* Under windows, the default pinvoke calling convention is stdcall */
53 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
54 #else
55 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
56 #endif
57
58 #define NOT_IMPLEMENTED g_assert_not_reached ()
59
60 const char*
61 mono_arch_regname (int reg) {
62         switch (reg) {
63         case X86_EAX: return "%eax";
64         case X86_EBX: return "%ebx";
65         case X86_ECX: return "%ecx";
66         case X86_EDX: return "%edx";
67         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
68         case X86_EDI: return "%edi";
69         case X86_ESI: return "%esi";
70         }
71         return "unknown";
72 }
73
74 const char*
75 mono_arch_fregname (int reg) {
76         return "unknown";
77 }
78
79 typedef enum {
80         ArgInIReg,
81         ArgInFloatSSEReg,
82         ArgInDoubleSSEReg,
83         ArgOnStack,
84         ArgValuetypeInReg,
85         ArgOnFloatFpStack,
86         ArgOnDoubleFpStack,
87         ArgNone
88 } ArgStorage;
89
90 typedef struct {
91         gint16 offset;
92         gint8  reg;
93         ArgStorage storage;
94
95         /* Only if storage == ArgValuetypeInReg */
96         ArgStorage pair_storage [2];
97         gint8 pair_regs [2];
98 } ArgInfo;
99
100 typedef struct {
101         int nargs;
102         guint32 stack_usage;
103         guint32 reg_usage;
104         guint32 freg_usage;
105         gboolean need_stack_align;
106         guint32 stack_align_amount;
107         ArgInfo ret;
108         ArgInfo sig_cookie;
109         ArgInfo args [1];
110 } CallInfo;
111
112 #define PARAM_REGS 0
113
114 #define FLOAT_PARAM_REGS 0
115
116 static X86_Reg_No param_regs [] = { 0 };
117
118 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
119 #define SMALL_STRUCTS_IN_REGS
120 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
121 #endif
122
123 static void inline
124 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
125 {
126     ainfo->offset = *stack_size;
127
128     if (*gr >= PARAM_REGS) {
129                 ainfo->storage = ArgOnStack;
130                 (*stack_size) += sizeof (gpointer);
131     }
132     else {
133                 ainfo->storage = ArgInIReg;
134                 ainfo->reg = param_regs [*gr];
135                 (*gr) ++;
136     }
137 }
138
139 static void inline
140 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
141 {
142         ainfo->offset = *stack_size;
143
144         g_assert (PARAM_REGS == 0);
145         
146         ainfo->storage = ArgOnStack;
147         (*stack_size) += sizeof (gpointer) * 2;
148 }
149
150 static void inline
151 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
152 {
153     ainfo->offset = *stack_size;
154
155     if (*gr >= FLOAT_PARAM_REGS) {
156                 ainfo->storage = ArgOnStack;
157                 (*stack_size) += is_double ? 8 : 4;
158     }
159     else {
160                 /* A double register */
161                 if (is_double)
162                         ainfo->storage = ArgInDoubleSSEReg;
163                 else
164                         ainfo->storage = ArgInFloatSSEReg;
165                 ainfo->reg = *gr;
166                 (*gr) += 1;
167     }
168 }
169
170
171 static void
172 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
173                gboolean is_return,
174                guint32 *gr, guint32 *fr, guint32 *stack_size)
175 {
176         guint32 size;
177         MonoClass *klass;
178
179         klass = mono_class_from_mono_type (type);
180         if (sig->pinvoke) 
181                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
182         else 
183                 size = mono_type_stack_size (&klass->byval_arg, NULL);
184
185 #ifdef SMALL_STRUCTS_IN_REGS
186         if (sig->pinvoke && is_return) {
187                 MonoMarshalType *info;
188
189                 /*
190                  * the exact rules are not very well documented, the code below seems to work with the 
191                  * code generated by gcc 3.3.3 -mno-cygwin.
192                  */
193                 info = mono_marshal_load_type_info (klass);
194                 g_assert (info);
195
196                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
197
198                 /* Special case structs with only a float member */
199                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
200                         ainfo->storage = ArgValuetypeInReg;
201                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
202                         return;
203                 }
204                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
205                         ainfo->storage = ArgValuetypeInReg;
206                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
207                         return;
208                 }               
209                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
210                         ainfo->storage = ArgValuetypeInReg;
211                         ainfo->pair_storage [0] = ArgInIReg;
212                         ainfo->pair_regs [0] = return_regs [0];
213                         if (info->native_size > 4) {
214                                 ainfo->pair_storage [1] = ArgInIReg;
215                                 ainfo->pair_regs [1] = return_regs [1];
216                         }
217                         return;
218                 }
219         }
220 #endif
221
222         ainfo->offset = *stack_size;
223         ainfo->storage = ArgOnStack;
224         *stack_size += ALIGN_TO (size, sizeof (gpointer));
225 }
226
227 /*
228  * get_call_info:
229  *
230  *  Obtain information about a call according to the calling convention.
231  * For x86 ELF, see the "System V Application Binary Interface Intel386 
232  * Architecture Processor Supplment, Fourth Edition" document for more
233  * information.
234  * For x86 win32, see ???.
235  */
236 static CallInfo*
237 get_call_info (MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
238 {
239         guint32 i, gr, fr;
240         MonoType *ret_type;
241         int n = sig->hasthis + sig->param_count;
242         guint32 stack_size = 0;
243         CallInfo *cinfo;
244
245         if (mp)
246                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
247         else
248                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
249
250         gr = 0;
251         fr = 0;
252
253         /* return value */
254         {
255                 ret_type = mono_type_get_underlying_type (sig->ret);
256                 switch (ret_type->type) {
257                 case MONO_TYPE_BOOLEAN:
258                 case MONO_TYPE_I1:
259                 case MONO_TYPE_U1:
260                 case MONO_TYPE_I2:
261                 case MONO_TYPE_U2:
262                 case MONO_TYPE_CHAR:
263                 case MONO_TYPE_I4:
264                 case MONO_TYPE_U4:
265                 case MONO_TYPE_I:
266                 case MONO_TYPE_U:
267                 case MONO_TYPE_PTR:
268                 case MONO_TYPE_FNPTR:
269                 case MONO_TYPE_CLASS:
270                 case MONO_TYPE_OBJECT:
271                 case MONO_TYPE_SZARRAY:
272                 case MONO_TYPE_ARRAY:
273                 case MONO_TYPE_STRING:
274                         cinfo->ret.storage = ArgInIReg;
275                         cinfo->ret.reg = X86_EAX;
276                         break;
277                 case MONO_TYPE_U8:
278                 case MONO_TYPE_I8:
279                         cinfo->ret.storage = ArgInIReg;
280                         cinfo->ret.reg = X86_EAX;
281                         break;
282                 case MONO_TYPE_R4:
283                         cinfo->ret.storage = ArgOnFloatFpStack;
284                         break;
285                 case MONO_TYPE_R8:
286                         cinfo->ret.storage = ArgOnDoubleFpStack;
287                         break;
288                 case MONO_TYPE_GENERICINST:
289                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
290                                 cinfo->ret.storage = ArgInIReg;
291                                 cinfo->ret.reg = X86_EAX;
292                                 break;
293                         }
294                         /* Fall through */
295                 case MONO_TYPE_VALUETYPE: {
296                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
297
298                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
299                         if (cinfo->ret.storage == ArgOnStack)
300                                 /* The caller passes the address where the value is stored */
301                                 add_general (&gr, &stack_size, &cinfo->ret);
302                         break;
303                 }
304                 case MONO_TYPE_TYPEDBYREF:
305                         /* Same as a valuetype with size 24 */
306                         add_general (&gr, &stack_size, &cinfo->ret);
307                         ;
308                         break;
309                 case MONO_TYPE_VOID:
310                         cinfo->ret.storage = ArgNone;
311                         break;
312                 default:
313                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
314                 }
315         }
316
317         /* this */
318         if (sig->hasthis)
319                 add_general (&gr, &stack_size, cinfo->args + 0);
320
321         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
322                 gr = PARAM_REGS;
323                 fr = FLOAT_PARAM_REGS;
324                 
325                 /* Emit the signature cookie just before the implicit arguments */
326                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
327         }
328
329         for (i = 0; i < sig->param_count; ++i) {
330                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
331                 MonoType *ptype;
332
333                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
334                         /* We allways pass the sig cookie on the stack for simplicity */
335                         /* 
336                          * Prevent implicit arguments + the sig cookie from being passed 
337                          * in registers.
338                          */
339                         gr = PARAM_REGS;
340                         fr = FLOAT_PARAM_REGS;
341
342                         /* Emit the signature cookie just before the implicit arguments */
343                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
344                 }
345
346                 if (sig->params [i]->byref) {
347                         add_general (&gr, &stack_size, ainfo);
348                         continue;
349                 }
350                 ptype = mono_type_get_underlying_type (sig->params [i]);
351                 switch (ptype->type) {
352                 case MONO_TYPE_BOOLEAN:
353                 case MONO_TYPE_I1:
354                 case MONO_TYPE_U1:
355                         add_general (&gr, &stack_size, ainfo);
356                         break;
357                 case MONO_TYPE_I2:
358                 case MONO_TYPE_U2:
359                 case MONO_TYPE_CHAR:
360                         add_general (&gr, &stack_size, ainfo);
361                         break;
362                 case MONO_TYPE_I4:
363                 case MONO_TYPE_U4:
364                         add_general (&gr, &stack_size, ainfo);
365                         break;
366                 case MONO_TYPE_I:
367                 case MONO_TYPE_U:
368                 case MONO_TYPE_PTR:
369                 case MONO_TYPE_FNPTR:
370                 case MONO_TYPE_CLASS:
371                 case MONO_TYPE_OBJECT:
372                 case MONO_TYPE_STRING:
373                 case MONO_TYPE_SZARRAY:
374                 case MONO_TYPE_ARRAY:
375                         add_general (&gr, &stack_size, ainfo);
376                         break;
377                 case MONO_TYPE_GENERICINST:
378                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
379                                 add_general (&gr, &stack_size, ainfo);
380                                 break;
381                         }
382                         /* Fall through */
383                 case MONO_TYPE_VALUETYPE:
384                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
385                         break;
386                 case MONO_TYPE_TYPEDBYREF:
387                         stack_size += sizeof (MonoTypedRef);
388                         ainfo->storage = ArgOnStack;
389                         break;
390                 case MONO_TYPE_U8:
391                 case MONO_TYPE_I8:
392                         add_general_pair (&gr, &stack_size, ainfo);
393                         break;
394                 case MONO_TYPE_R4:
395                         add_float (&fr, &stack_size, ainfo, FALSE);
396                         break;
397                 case MONO_TYPE_R8:
398                         add_float (&fr, &stack_size, ainfo, TRUE);
399                         break;
400                 default:
401                         g_error ("unexpected type 0x%x", ptype->type);
402                         g_assert_not_reached ();
403                 }
404         }
405
406         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
407                 gr = PARAM_REGS;
408                 fr = FLOAT_PARAM_REGS;
409                 
410                 /* Emit the signature cookie just before the implicit arguments */
411                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
412         }
413
414 #if defined(__APPLE__)
415         if ((stack_size % 16) != 0) { 
416                 cinfo->need_stack_align = TRUE;
417                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
418         }
419 #endif
420
421         cinfo->stack_usage = stack_size;
422         cinfo->reg_usage = gr;
423         cinfo->freg_usage = fr;
424         return cinfo;
425 }
426
427 /*
428  * mono_arch_get_argument_info:
429  * @csig:  a method signature
430  * @param_count: the number of parameters to consider
431  * @arg_info: an array to store the result infos
432  *
433  * Gathers information on parameters such as size, alignment and
434  * padding. arg_info should be large enought to hold param_count + 1 entries. 
435  *
436  * Returns the size of the activation frame.
437  */
438 int
439 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
440 {
441         int k, frame_size = 0;
442         int size, pad;
443         guint32 align;
444         int offset = 8;
445         CallInfo *cinfo;
446
447         cinfo = get_call_info (NULL, csig, FALSE);
448
449         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
450                 frame_size += sizeof (gpointer);
451                 offset += 4;
452         }
453
454         arg_info [0].offset = offset;
455
456         if (csig->hasthis) {
457                 frame_size += sizeof (gpointer);
458                 offset += 4;
459         }
460
461         arg_info [0].size = frame_size;
462
463         for (k = 0; k < param_count; k++) {
464                 
465                 if (csig->pinvoke)
466                         size = mono_type_native_stack_size (csig->params [k], &align);
467                 else {
468                         int ialign;
469                         size = mono_type_stack_size (csig->params [k], &ialign);
470                         align = ialign;
471                 }
472
473                 /* ignore alignment for now */
474                 align = 1;
475
476                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
477                 arg_info [k].pad = pad;
478                 frame_size += size;
479                 arg_info [k + 1].pad = 0;
480                 arg_info [k + 1].size = size;
481                 offset += pad;
482                 arg_info [k + 1].offset = offset;
483                 offset += size;
484         }
485
486         align = MONO_ARCH_FRAME_ALIGNMENT;
487         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
488         arg_info [k].pad = pad;
489
490         g_free (cinfo);
491
492         return frame_size;
493 }
494
495 static const guchar cpuid_impl [] = {
496         0x55,                           /* push   %ebp */
497         0x89, 0xe5,                     /* mov    %esp,%ebp */
498         0x53,                           /* push   %ebx */
499         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
500         0x0f, 0xa2,                     /* cpuid   */
501         0x50,                           /* push   %eax */
502         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
503         0x89, 0x18,                     /* mov    %ebx,(%eax) */
504         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
505         0x89, 0x08,                     /* mov    %ecx,(%eax) */
506         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
507         0x89, 0x10,                     /* mov    %edx,(%eax) */
508         0x58,                           /* pop    %eax */
509         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
510         0x89, 0x02,                     /* mov    %eax,(%edx) */
511         0x5b,                           /* pop    %ebx */
512         0xc9,                           /* leave   */
513         0xc3,                           /* ret     */
514 };
515
516 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
517
518 static int 
519 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
520 {
521         int have_cpuid = 0;
522 #ifndef _MSC_VER
523         __asm__  __volatile__ (
524                 "pushfl\n"
525                 "popl %%eax\n"
526                 "movl %%eax, %%edx\n"
527                 "xorl $0x200000, %%eax\n"
528                 "pushl %%eax\n"
529                 "popfl\n"
530                 "pushfl\n"
531                 "popl %%eax\n"
532                 "xorl %%edx, %%eax\n"
533                 "andl $0x200000, %%eax\n"
534                 "movl %%eax, %0"
535                 : "=r" (have_cpuid)
536                 :
537                 : "%eax", "%edx"
538         );
539 #else
540         __asm {
541                 pushfd
542                 pop eax
543                 mov edx, eax
544                 xor eax, 0x200000
545                 push eax
546                 popfd
547                 pushfd
548                 pop eax
549                 xor eax, edx
550                 and eax, 0x200000
551                 mov have_cpuid, eax
552         }
553 #endif
554         if (have_cpuid) {
555                 /* Have to use the code manager to get around WinXP DEP */
556                 static CpuidFunc func = NULL;
557                 void *ptr;
558                 if (!func) {
559                         ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
560                         memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
561                         func = (CpuidFunc)ptr;
562                 }
563                 func (id, p_eax, p_ebx, p_ecx, p_edx);
564
565                 /*
566                  * We use this approach because of issues with gcc and pic code, see:
567                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
568                 __asm__ __volatile__ ("cpuid"
569                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
570                         : "a" (id));
571                 */
572                 return 1;
573         }
574         return 0;
575 }
576
577 /*
578  * Initialize the cpu to execute managed code.
579  */
580 void
581 mono_arch_cpu_init (void)
582 {
583         /* spec compliance requires running with double precision */
584 #ifndef _MSC_VER
585         guint16 fpcw;
586
587         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
588         fpcw &= ~X86_FPCW_PRECC_MASK;
589         fpcw |= X86_FPCW_PREC_DOUBLE;
590         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
591         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
592 #else
593         _control87 (_PC_53, MCW_PC);
594 #endif
595 }
596
597 /*
598  * This function returns the optimizations supported on this cpu.
599  */
600 guint32
601 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
602 {
603         int eax, ebx, ecx, edx;
604         guint32 opts = 0;
605         
606         *exclude_mask = 0;
607         /* Feature Flags function, flags returned in EDX. */
608         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
609                 if (edx & (1 << 15)) {
610                         opts |= MONO_OPT_CMOV;
611                         if (edx & 1)
612                                 opts |= MONO_OPT_FCMOV;
613                         else
614                                 *exclude_mask |= MONO_OPT_FCMOV;
615                 } else
616                         *exclude_mask |= MONO_OPT_CMOV;
617         }
618         return opts;
619 }
620
621 /*
622  * Determine whenever the trap whose info is in SIGINFO is caused by
623  * integer overflow.
624  */
625 gboolean
626 mono_arch_is_int_overflow (void *sigctx, void *info)
627 {
628         MonoContext ctx;
629         guint8* ip;
630
631         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
632
633         ip = (guint8*)ctx.eip;
634
635         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
636                 gint32 reg;
637
638                 /* idiv REG */
639                 switch (x86_modrm_rm (ip [1])) {
640                 case X86_EAX:
641                         reg = ctx.eax;
642                         break;
643                 case X86_ECX:
644                         reg = ctx.ecx;
645                         break;
646                 case X86_EDX:
647                         reg = ctx.edx;
648                         break;
649                 case X86_EBX:
650                         reg = ctx.ebx;
651                         break;
652                 case X86_ESI:
653                         reg = ctx.esi;
654                         break;
655                 case X86_EDI:
656                         reg = ctx.edi;
657                         break;
658                 default:
659                         g_assert_not_reached ();
660                         reg = -1;
661                 }
662
663                 if (reg == -1)
664                         return TRUE;
665         }
666                         
667         return FALSE;
668 }
669
670 GList *
671 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
672 {
673         GList *vars = NULL;
674         int i;
675
676         for (i = 0; i < cfg->num_varinfo; i++) {
677                 MonoInst *ins = cfg->varinfo [i];
678                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
679
680                 /* unused vars */
681                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
682                         continue;
683
684                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
685                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
686                         continue;
687
688                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
689                  * 8bit quantities in caller saved registers on x86 */
690                 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
691                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
692                         g_assert (i == vmv->idx);
693                         vars = g_list_prepend (vars, vmv);
694                 }
695         }
696
697         vars = mono_varlist_sort (cfg, vars, 0);
698
699         return vars;
700 }
701
702 GList *
703 mono_arch_get_global_int_regs (MonoCompile *cfg)
704 {
705         GList *regs = NULL;
706
707         /* we can use 3 registers for global allocation */
708         regs = g_list_prepend (regs, (gpointer)X86_EBX);
709         regs = g_list_prepend (regs, (gpointer)X86_ESI);
710         regs = g_list_prepend (regs, (gpointer)X86_EDI);
711
712         return regs;
713 }
714
715 /*
716  * mono_arch_regalloc_cost:
717  *
718  *  Return the cost, in number of memory references, of the action of 
719  * allocating the variable VMV into a register during global register
720  * allocation.
721  */
722 guint32
723 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
724 {
725         MonoInst *ins = cfg->varinfo [vmv->idx];
726
727         if (cfg->method->save_lmf)
728                 /* The register is already saved */
729                 return (ins->opcode == OP_ARG) ? 1 : 0;
730         else
731                 /* push+pop+possible load if it is an argument */
732                 return (ins->opcode == OP_ARG) ? 3 : 2;
733 }
734  
735 /*
736  * Set var information according to the calling convention. X86 version.
737  * The locals var stuff should most likely be split in another method.
738  */
739 void
740 mono_arch_allocate_vars (MonoCompile *cfg)
741 {
742         MonoMethodSignature *sig;
743         MonoMethodHeader *header;
744         MonoInst *inst;
745         guint32 locals_stack_size, locals_stack_align;
746         int i, offset;
747         gint32 *offsets;
748         CallInfo *cinfo;
749
750         header = mono_method_get_header (cfg->method);
751         sig = mono_method_signature (cfg->method);
752
753         cinfo = get_call_info (cfg->mempool, sig, FALSE);
754
755         cfg->frame_reg = MONO_ARCH_BASEREG;
756         offset = 0;
757
758         /* Reserve space to save LMF and caller saved registers */
759
760         if (cfg->method->save_lmf) {
761                 offset += sizeof (MonoLMF);
762         } else {
763                 if (cfg->used_int_regs & (1 << X86_EBX)) {
764                         offset += 4;
765                 }
766
767                 if (cfg->used_int_regs & (1 << X86_EDI)) {
768                         offset += 4;
769                 }
770
771                 if (cfg->used_int_regs & (1 << X86_ESI)) {
772                         offset += 4;
773                 }
774         }
775
776         switch (cinfo->ret.storage) {
777         case ArgValuetypeInReg:
778                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
779                 offset += 8;
780                 cfg->ret->opcode = OP_REGOFFSET;
781                 cfg->ret->inst_basereg = X86_EBP;
782                 cfg->ret->inst_offset = - offset;
783                 break;
784         default:
785                 break;
786         }
787
788         /* Allocate locals */
789         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
790         if (locals_stack_align) {
791                 offset += (locals_stack_align - 1);
792                 offset &= ~(locals_stack_align - 1);
793         }
794         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
795                 if (offsets [i] != -1) {
796                         MonoInst *inst = cfg->varinfo [i];
797                         inst->opcode = OP_REGOFFSET;
798                         inst->inst_basereg = X86_EBP;
799                         inst->inst_offset = - (offset + offsets [i]);
800                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
801                 }
802         }
803         offset += locals_stack_size;
804
805
806         /*
807          * Allocate arguments+return value
808          */
809
810         switch (cinfo->ret.storage) {
811         case ArgOnStack:
812                 cfg->ret->opcode = OP_REGOFFSET;
813                 cfg->ret->inst_basereg = X86_EBP;
814                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
815                 break;
816         case ArgValuetypeInReg:
817                 break;
818         case ArgInIReg:
819                 cfg->ret->opcode = OP_REGVAR;
820                 cfg->ret->inst_c0 = cinfo->ret.reg;
821                 break;
822         case ArgNone:
823         case ArgOnFloatFpStack:
824         case ArgOnDoubleFpStack:
825                 break;
826         default:
827                 g_assert_not_reached ();
828         }
829
830         if (sig->call_convention == MONO_CALL_VARARG) {
831                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
832                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
833         }
834
835         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
836                 ArgInfo *ainfo = &cinfo->args [i];
837                 inst = cfg->args [i];
838                 if (inst->opcode != OP_REGVAR) {
839                         inst->opcode = OP_REGOFFSET;
840                         inst->inst_basereg = X86_EBP;
841                 }
842                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
843         }
844
845         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
846         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
847
848         cfg->stack_offset = offset;
849 }
850
851 void
852 mono_arch_create_vars (MonoCompile *cfg)
853 {
854         MonoMethodSignature *sig;
855         CallInfo *cinfo;
856
857         sig = mono_method_signature (cfg->method);
858
859         cinfo = get_call_info (cfg->mempool, sig, FALSE);
860
861         if (cinfo->ret.storage == ArgValuetypeInReg)
862                 cfg->ret_var_is_local = TRUE;
863 }
864
865 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
866  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
867  */
868
869 static void
870 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
871 {
872         MonoInst *arg;
873         MonoMethodSignature *tmp_sig;
874         MonoInst *sig_arg;
875
876         /* FIXME: Add support for signature tokens to AOT */
877         cfg->disable_aot = TRUE;
878         MONO_INST_NEW (cfg, arg, OP_OUTARG);
879
880         /*
881          * mono_ArgIterator_Setup assumes the signature cookie is 
882          * passed first and all the arguments which were before it are
883          * passed on the stack after the signature. So compensate by 
884          * passing a different signature.
885          */
886         tmp_sig = mono_metadata_signature_dup (call->signature);
887         tmp_sig->param_count -= call->signature->sentinelpos;
888         tmp_sig->sentinelpos = 0;
889         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
890
891         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
892         sig_arg->inst_p0 = tmp_sig;
893
894         arg->inst_left = sig_arg;
895         arg->type = STACK_PTR;
896         /* prepend, so they get reversed */
897         arg->next = call->out_args;
898         call->out_args = arg;
899 }
900
901 /* 
902  * take the arguments and generate the arch-specific
903  * instructions to properly call the function in call.
904  * This includes pushing, moving arguments to the right register
905  * etc.
906  */
907 MonoCallInst*
908 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
909         MonoInst *arg, *in;
910         MonoMethodSignature *sig;
911         int i, n;
912         CallInfo *cinfo;
913         int sentinelpos = 0;
914
915         sig = call->signature;
916         n = sig->param_count + sig->hasthis;
917
918         cinfo = get_call_info (cfg->mempool, sig, FALSE);
919
920         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
921                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
922
923         for (i = 0; i < n; ++i) {
924                 ArgInfo *ainfo = cinfo->args + i;
925
926                 /* Emit the signature cookie just before the implicit arguments */
927                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
928                         emit_sig_cookie (cfg, call);
929                 }
930
931                 if (is_virtual && i == 0) {
932                         /* the argument will be attached to the call instrucion */
933                         in = call->args [i];
934                 } else {
935                         MonoType *t;
936
937                         if (i >= sig->hasthis)
938                                 t = sig->params [i - sig->hasthis];
939                         else
940                                 t = &mono_defaults.int_class->byval_arg;
941                         t = mono_type_get_underlying_type (t);
942
943                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
944                         in = call->args [i];
945                         arg->cil_code = in->cil_code;
946                         arg->inst_left = in;
947                         arg->type = in->type;
948                         /* prepend, so they get reversed */
949                         arg->next = call->out_args;
950                         call->out_args = arg;
951
952                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
953                                 guint32 size, align;
954
955                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
956                                         size = sizeof (MonoTypedRef);
957                                         align = sizeof (gpointer);
958                                 }
959                                 else
960                                         if (sig->pinvoke)
961                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
962                                         else {
963                                                 int ialign;
964                                                 size = mono_type_stack_size (&in->klass->byval_arg, &ialign);
965                                                 align = ialign;
966                                         }
967                                 arg->opcode = OP_OUTARG_VT;
968                                 arg->klass = in->klass;
969                                 arg->backend.is_pinvoke = sig->pinvoke;
970                                 arg->inst_imm = size; 
971                         }
972                         else {
973                                 switch (ainfo->storage) {
974                                 case ArgOnStack:
975                                         arg->opcode = OP_OUTARG;
976                                         if (!t->byref) {
977                                                 if (t->type == MONO_TYPE_R4)
978                                                         arg->opcode = OP_OUTARG_R4;
979                                                 else
980                                                         if (t->type == MONO_TYPE_R8)
981                                                                 arg->opcode = OP_OUTARG_R8;
982                                         }
983                                         break;
984                                 default:
985                                         g_assert_not_reached ();
986                                 }
987                         }
988                 }
989         }
990
991         /* Handle the case where there are no implicit arguments */
992         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
993                 emit_sig_cookie (cfg, call);
994         }
995
996         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
997                 if (cinfo->ret.storage == ArgValuetypeInReg) {
998                         MonoInst *zero_inst;
999                         /*
1000                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1001                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1002                          * before calling the function. So we add a dummy instruction to represent pushing the 
1003                          * struct return address to the stack. The return address will be saved to this stack slot 
1004                          * by the code emitted in this_vret_args.
1005                          */
1006                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1007                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1008                         zero_inst->inst_p0 = 0;
1009                         arg->inst_left = zero_inst;
1010                         arg->type = STACK_PTR;
1011                         /* prepend, so they get reversed */
1012                         arg->next = call->out_args;
1013                         call->out_args = arg;
1014                 }
1015                 else
1016                         /* if the function returns a struct, the called method already does a ret $0x4 */
1017                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1018                                 cinfo->stack_usage -= 4;
1019         }
1020         
1021         call->stack_usage = cinfo->stack_usage;
1022
1023 #if defined(__APPLE__)
1024         if (cinfo->need_stack_align) {
1025                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1026                 arg->inst_c0 = cinfo->stack_align_amount;
1027                 arg->next = call->out_args;
1028                 call->out_args = arg;
1029         }
1030 #endif 
1031
1032         return call;
1033 }
1034
1035 /*
1036  * Allow tracing to work with this interface (with an optional argument)
1037  */
1038 void*
1039 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1040 {
1041         guchar *code = p;
1042
1043 #if __APPLE__
1044         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1045 #endif
1046
1047         /* if some args are passed in registers, we need to save them here */
1048         x86_push_reg (code, X86_EBP);
1049
1050         if (cfg->compile_aot) {
1051                 x86_push_imm (code, cfg->method);
1052                 x86_mov_reg_imm (code, X86_EAX, func);
1053                 x86_call_reg (code, X86_EAX);
1054         } else {
1055                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1056                 x86_push_imm (code, cfg->method);
1057                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1058                 x86_call_code (code, 0);
1059         }
1060 #if __APPLE__
1061         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 16);
1062 #else
1063         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1064 #endif
1065
1066         return code;
1067 }
1068
1069 enum {
1070         SAVE_NONE,
1071         SAVE_STRUCT,
1072         SAVE_EAX,
1073         SAVE_EAX_EDX,
1074         SAVE_FP
1075 };
1076
1077 void*
1078 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1079 {
1080         guchar *code = p;
1081         int arg_size = 0, save_mode = SAVE_NONE;
1082         MonoMethod *method = cfg->method;
1083         
1084         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1085         case MONO_TYPE_VOID:
1086                 /* special case string .ctor icall */
1087                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1088                         save_mode = SAVE_EAX;
1089                 else
1090                         save_mode = SAVE_NONE;
1091                 break;
1092         case MONO_TYPE_I8:
1093         case MONO_TYPE_U8:
1094                 save_mode = SAVE_EAX_EDX;
1095                 break;
1096         case MONO_TYPE_R4:
1097         case MONO_TYPE_R8:
1098                 save_mode = SAVE_FP;
1099                 break;
1100         case MONO_TYPE_GENERICINST:
1101                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1102                         save_mode = SAVE_EAX;
1103                         break;
1104                 }
1105                 /* Fall through */
1106         case MONO_TYPE_VALUETYPE:
1107                 save_mode = SAVE_STRUCT;
1108                 break;
1109         default:
1110                 save_mode = SAVE_EAX;
1111                 break;
1112         }
1113
1114         switch (save_mode) {
1115         case SAVE_EAX_EDX:
1116                 x86_push_reg (code, X86_EDX);
1117                 x86_push_reg (code, X86_EAX);
1118                 if (enable_arguments) {
1119                         x86_push_reg (code, X86_EDX);
1120                         x86_push_reg (code, X86_EAX);
1121                         arg_size = 8;
1122                 }
1123                 break;
1124         case SAVE_EAX:
1125                 x86_push_reg (code, X86_EAX);
1126                 if (enable_arguments) {
1127                         x86_push_reg (code, X86_EAX);
1128                         arg_size = 4;
1129                 }
1130                 break;
1131         case SAVE_FP:
1132                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1133                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1134                 if (enable_arguments) {
1135                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1136                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1137                         arg_size = 8;
1138                 }
1139                 break;
1140         case SAVE_STRUCT:
1141                 if (enable_arguments) {
1142                         x86_push_membase (code, X86_EBP, 8);
1143                         arg_size = 4;
1144                 }
1145                 break;
1146         case SAVE_NONE:
1147         default:
1148                 break;
1149         }
1150
1151         if (cfg->compile_aot) {
1152                 x86_push_imm (code, method);
1153                 x86_mov_reg_imm (code, X86_EAX, func);
1154                 x86_call_reg (code, X86_EAX);
1155         } else {
1156                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1157                 x86_push_imm (code, method);
1158                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1159                 x86_call_code (code, 0);
1160         }
1161         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1162
1163         switch (save_mode) {
1164         case SAVE_EAX_EDX:
1165                 x86_pop_reg (code, X86_EAX);
1166                 x86_pop_reg (code, X86_EDX);
1167                 break;
1168         case SAVE_EAX:
1169                 x86_pop_reg (code, X86_EAX);
1170                 break;
1171         case SAVE_FP:
1172                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1173                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1174                 break;
1175         case SAVE_NONE:
1176         default:
1177                 break;
1178         }
1179
1180         return code;
1181 }
1182
1183 #define EMIT_COND_BRANCH(ins,cond,sign) \
1184 if (ins->flags & MONO_INST_BRLABEL) { \
1185         if (ins->inst_i0->inst_c0) { \
1186                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1187         } else { \
1188                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1189                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1190                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1191                         x86_branch8 (code, cond, 0, sign); \
1192                 else \
1193                         x86_branch32 (code, cond, 0, sign); \
1194         } \
1195 } else { \
1196         if (ins->inst_true_bb->native_offset) { \
1197                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1198         } else { \
1199                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1200                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1201                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1202                         x86_branch8 (code, cond, 0, sign); \
1203                 else \
1204                         x86_branch32 (code, cond, 0, sign); \
1205         } \
1206 }
1207
1208 /*  
1209  *      Emit an exception if condition is fail and
1210  *  if possible do a directly branch to target 
1211  */
1212 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1213         do {                                                        \
1214                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1215                 if (tins == NULL) {                                                                             \
1216                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1217                                         MONO_PATCH_INFO_EXC, exc_name);  \
1218                         x86_branch32 (code, cond, 0, signed);               \
1219                 } else {        \
1220                         EMIT_COND_BRANCH (tins, cond, signed);  \
1221                 }                       \
1222         } while (0); 
1223
1224 #define EMIT_FPCOMPARE(code) do { \
1225         x86_fcompp (code); \
1226         x86_fnstsw (code); \
1227 } while (0); 
1228
1229
1230 static guint8*
1231 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1232 {
1233         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1234         x86_call_code (code, 0);
1235
1236         return code;
1237 }
1238
1239 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1240
1241 /*
1242  * peephole_pass_1:
1243  *
1244  *   Perform peephole opts which should/can be performed before local regalloc
1245  */
1246 static void
1247 peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1248 {
1249         MonoInst *ins, *last_ins = NULL;
1250         ins = bb->code;
1251
1252         while (ins) {
1253                 switch (ins->opcode) {
1254                 case OP_IADD_IMM:
1255                 case OP_ADD_IMM:
1256                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1257                                 /* 
1258                                  * X86_LEA is like ADD, but doesn't have the
1259                                  * sreg1==dreg restriction.
1260                                  */
1261                                 ins->opcode = OP_X86_LEA_MEMBASE;
1262                                 ins->inst_basereg = ins->sreg1;
1263                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1264                                 ins->opcode = OP_X86_INC_REG;
1265                         break;
1266                 case OP_SUB_IMM:
1267                 case OP_ISUB_IMM:
1268                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1269                                 ins->opcode = OP_X86_LEA_MEMBASE;
1270                                 ins->inst_basereg = ins->sreg1;
1271                                 ins->inst_imm = -ins->inst_imm;
1272                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1273                                 ins->opcode = OP_X86_DEC_REG;
1274                         break;
1275                 case OP_COMPARE_IMM:
1276                 case OP_ICOMPARE_IMM:
1277                         /* OP_COMPARE_IMM (reg, 0) 
1278                          * --> 
1279                          * OP_X86_TEST_NULL (reg) 
1280                          */
1281                         if (!ins->inst_imm)
1282                                 ins->opcode = OP_X86_TEST_NULL;
1283                         break;
1284                 case OP_X86_COMPARE_MEMBASE_IMM:
1285                         /* 
1286                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1287                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1288                          * -->
1289                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1290                          * OP_COMPARE_IMM reg, imm
1291                          *
1292                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1293                          */
1294                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1295                             ins->inst_basereg == last_ins->inst_destbasereg &&
1296                             ins->inst_offset == last_ins->inst_offset) {
1297                                         ins->opcode = OP_COMPARE_IMM;
1298                                         ins->sreg1 = last_ins->sreg1;
1299
1300                                         /* check if we can remove cmp reg,0 with test null */
1301                                         if (!ins->inst_imm)
1302                                                 ins->opcode = OP_X86_TEST_NULL;
1303                                 }
1304
1305                         break;
1306                 case OP_LOAD_MEMBASE:
1307                 case OP_LOADI4_MEMBASE:
1308                         /* 
1309                          * Note: if reg1 = reg2 the load op is removed
1310                          *
1311                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1312                          * OP_LOAD_MEMBASE offset(basereg), reg2
1313                          * -->
1314                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1315                          * OP_MOVE reg1, reg2
1316                          */
1317                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1318                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1319                             ins->inst_basereg == last_ins->inst_destbasereg &&
1320                             ins->inst_offset == last_ins->inst_offset) {
1321                                 if (ins->dreg == last_ins->sreg1) {
1322                                         last_ins->next = ins->next;                             
1323                                         ins = ins->next;                                
1324                                         continue;
1325                                 } else {
1326                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1327                                         ins->opcode = OP_MOVE;
1328                                         ins->sreg1 = last_ins->sreg1;
1329                                 }
1330
1331                         /* 
1332                          * Note: reg1 must be different from the basereg in the second load
1333                          * Note: if reg1 = reg2 is equal then second load is removed
1334                          *
1335                          * OP_LOAD_MEMBASE offset(basereg), reg1
1336                          * OP_LOAD_MEMBASE offset(basereg), reg2
1337                          * -->
1338                          * OP_LOAD_MEMBASE offset(basereg), reg1
1339                          * OP_MOVE reg1, reg2
1340                          */
1341                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1342                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1343                               ins->inst_basereg != last_ins->dreg &&
1344                               ins->inst_basereg == last_ins->inst_basereg &&
1345                               ins->inst_offset == last_ins->inst_offset) {
1346
1347                                 if (ins->dreg == last_ins->dreg) {
1348                                         last_ins->next = ins->next;                             
1349                                         ins = ins->next;                                
1350                                         continue;
1351                                 } else {
1352                                         ins->opcode = OP_MOVE;
1353                                         ins->sreg1 = last_ins->dreg;
1354                                 }
1355
1356                                 //g_assert_not_reached ();
1357
1358 #if 0
1359                         /* 
1360                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1361                          * OP_LOAD_MEMBASE offset(basereg), reg
1362                          * -->
1363                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1364                          * OP_ICONST reg, imm
1365                          */
1366                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1367                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1368                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1369                                    ins->inst_offset == last_ins->inst_offset) {
1370                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1371                                 ins->opcode = OP_ICONST;
1372                                 ins->inst_c0 = last_ins->inst_imm;
1373                                 g_assert_not_reached (); // check this rule
1374 #endif
1375                         }
1376                         break;
1377                 case OP_LOADU1_MEMBASE:
1378                 case OP_LOADI1_MEMBASE:
1379                         /* 
1380                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1381                          * OP_LOAD_MEMBASE offset(basereg), reg2
1382                          * -->
1383                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1384                          * CONV_I2/U2 reg1, reg2
1385                          */
1386                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1387                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1388                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1389                                         ins->inst_offset == last_ins->inst_offset) {
1390                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1391                                 ins->sreg1 = last_ins->sreg1;
1392                         }
1393                         break;
1394                 case OP_LOADU2_MEMBASE:
1395                 case OP_LOADI2_MEMBASE:
1396                         /* 
1397                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1398                          * OP_LOAD_MEMBASE offset(basereg), reg2
1399                          * -->
1400                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1401                          * CONV_I2/U2 reg1, reg2
1402                          */
1403                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1404                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1405                                         ins->inst_offset == last_ins->inst_offset) {
1406                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1407                                 ins->sreg1 = last_ins->sreg1;
1408                         }
1409                         break;
1410                 case CEE_CONV_I4:
1411                 case CEE_CONV_U4:
1412                 case OP_ICONV_TO_I4:
1413                 case OP_MOVE:
1414                         /*
1415                          * Removes:
1416                          *
1417                          * OP_MOVE reg, reg 
1418                          */
1419                         if (ins->dreg == ins->sreg1) {
1420                                 if (last_ins)
1421                                         last_ins->next = ins->next;                             
1422                                 ins = ins->next;
1423                                 continue;
1424                         }
1425                         /* 
1426                          * Removes:
1427                          *
1428                          * OP_MOVE sreg, dreg 
1429                          * OP_MOVE dreg, sreg
1430                          */
1431                         if (last_ins && last_ins->opcode == OP_MOVE &&
1432                             ins->sreg1 == last_ins->dreg &&
1433                             ins->dreg == last_ins->sreg1) {
1434                                 last_ins->next = ins->next;                             
1435                                 ins = ins->next;                                
1436                                 continue;
1437                         }
1438                         break;
1439                         
1440                 case OP_X86_PUSH_MEMBASE:
1441                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1442                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1443                             ins->inst_basereg == last_ins->inst_destbasereg &&
1444                             ins->inst_offset == last_ins->inst_offset) {
1445                                     ins->opcode = OP_X86_PUSH;
1446                                     ins->sreg1 = last_ins->sreg1;
1447                         }
1448                         break;
1449                 }
1450                 last_ins = ins;
1451                 ins = ins->next;
1452         }
1453         bb->last_ins = last_ins;
1454 }
1455
1456 static void
1457 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1458 {
1459         MonoInst *ins, *last_ins = NULL;
1460         ins = bb->code;
1461
1462         while (ins) {
1463
1464                 switch (ins->opcode) {
1465                 case OP_ICONST:
1466                         /* reg = 0 -> XOR (reg, reg) */
1467                         /* XOR sets cflags on x86, so we cant do it always */
1468                         if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1469                                 MonoInst *ins2;
1470
1471                                 ins->opcode = OP_IXOR;
1472                                 ins->sreg1 = ins->dreg;
1473                                 ins->sreg2 = ins->dreg;
1474
1475                                 /* 
1476                                  * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG 
1477                                  * since it takes 3 bytes instead of 7.
1478                                  */
1479                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1480                                         if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1481                                                 ins2->opcode = OP_STORE_MEMBASE_REG;
1482                                                 ins2->sreg1 = ins->dreg;
1483                                         }
1484                                         else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1485                                                 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1486                                                 ins2->sreg1 = ins->dreg;
1487                                         }
1488                                         else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1489                                                 /* Continue iteration */
1490                                         }
1491                                         else
1492                                                 break;
1493                                 }
1494                         }
1495                         break;
1496                 case OP_IADD_IMM:
1497                 case OP_ADD_IMM:
1498                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1499                                 ins->opcode = OP_X86_INC_REG;
1500                         break;
1501                 case OP_ISUB_IMM:
1502                 case OP_SUB_IMM:
1503                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1504                                 ins->opcode = OP_X86_DEC_REG;
1505                         break;
1506                 case OP_X86_COMPARE_MEMBASE_IMM:
1507                         /* 
1508                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1509                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1510                          * -->
1511                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1512                          * OP_COMPARE_IMM reg, imm
1513                          *
1514                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1515                          */
1516                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1517                             ins->inst_basereg == last_ins->inst_destbasereg &&
1518                             ins->inst_offset == last_ins->inst_offset) {
1519                                         ins->opcode = OP_COMPARE_IMM;
1520                                         ins->sreg1 = last_ins->sreg1;
1521
1522                                         /* check if we can remove cmp reg,0 with test null */
1523                                         if (!ins->inst_imm)
1524                                                 ins->opcode = OP_X86_TEST_NULL;
1525                                 }
1526
1527                         break;
1528                 case OP_LOAD_MEMBASE:
1529                 case OP_LOADI4_MEMBASE:
1530                         /* 
1531                          * Note: if reg1 = reg2 the load op is removed
1532                          *
1533                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1534                          * OP_LOAD_MEMBASE offset(basereg), reg2
1535                          * -->
1536                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1537                          * OP_MOVE reg1, reg2
1538                          */
1539                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1540                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1541                             ins->inst_basereg == last_ins->inst_destbasereg &&
1542                             ins->inst_offset == last_ins->inst_offset) {
1543                                 if (ins->dreg == last_ins->sreg1) {
1544                                         last_ins->next = ins->next;                             
1545                                         ins = ins->next;                                
1546                                         continue;
1547                                 } else {
1548                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1549                                         ins->opcode = OP_MOVE;
1550                                         ins->sreg1 = last_ins->sreg1;
1551                                 }
1552
1553                         /* 
1554                          * Note: reg1 must be different from the basereg in the second load
1555                          * Note: if reg1 = reg2 is equal then second load is removed
1556                          *
1557                          * OP_LOAD_MEMBASE offset(basereg), reg1
1558                          * OP_LOAD_MEMBASE offset(basereg), reg2
1559                          * -->
1560                          * OP_LOAD_MEMBASE offset(basereg), reg1
1561                          * OP_MOVE reg1, reg2
1562                          */
1563                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1564                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1565                               ins->inst_basereg != last_ins->dreg &&
1566                               ins->inst_basereg == last_ins->inst_basereg &&
1567                               ins->inst_offset == last_ins->inst_offset) {
1568
1569                                 if (ins->dreg == last_ins->dreg) {
1570                                         last_ins->next = ins->next;                             
1571                                         ins = ins->next;                                
1572                                         continue;
1573                                 } else {
1574                                         ins->opcode = OP_MOVE;
1575                                         ins->sreg1 = last_ins->dreg;
1576                                 }
1577
1578                                 //g_assert_not_reached ();
1579
1580 #if 0
1581                         /* 
1582                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1583                          * OP_LOAD_MEMBASE offset(basereg), reg
1584                          * -->
1585                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1586                          * OP_ICONST reg, imm
1587                          */
1588                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1589                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1590                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1591                                    ins->inst_offset == last_ins->inst_offset) {
1592                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1593                                 ins->opcode = OP_ICONST;
1594                                 ins->inst_c0 = last_ins->inst_imm;
1595                                 g_assert_not_reached (); // check this rule
1596 #endif
1597                         }
1598                         break;
1599                 case OP_LOADU1_MEMBASE:
1600                 case OP_LOADI1_MEMBASE:
1601                         /* 
1602                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1603                          * OP_LOAD_MEMBASE offset(basereg), reg2
1604                          * -->
1605                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1606                          * CONV_I2/U2 reg1, reg2
1607                          */
1608                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1609                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1610                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1611                                         ins->inst_offset == last_ins->inst_offset) {
1612                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1613                                 ins->sreg1 = last_ins->sreg1;
1614                         }
1615                         break;
1616                 case OP_LOADU2_MEMBASE:
1617                 case OP_LOADI2_MEMBASE:
1618                         /* 
1619                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1620                          * OP_LOAD_MEMBASE offset(basereg), reg2
1621                          * -->
1622                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1623                          * CONV_I2/U2 reg1, reg2
1624                          */
1625                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1626                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1627                                         ins->inst_offset == last_ins->inst_offset) {
1628                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1629                                 ins->sreg1 = last_ins->sreg1;
1630                         }
1631                         break;
1632                 case CEE_CONV_I4:
1633                 case CEE_CONV_U4:
1634                 case OP_ICONV_TO_I4:
1635                 case OP_MOVE:
1636                         /*
1637                          * Removes:
1638                          *
1639                          * OP_MOVE reg, reg 
1640                          */
1641                         if (ins->dreg == ins->sreg1) {
1642                                 if (last_ins)
1643                                         last_ins->next = ins->next;                             
1644                                 ins = ins->next;
1645                                 continue;
1646                         }
1647                         /* 
1648                          * Removes:
1649                          *
1650                          * OP_MOVE sreg, dreg 
1651                          * OP_MOVE dreg, sreg
1652                          */
1653                         if (last_ins && last_ins->opcode == OP_MOVE &&
1654                             ins->sreg1 == last_ins->dreg &&
1655                             ins->dreg == last_ins->sreg1) {
1656                                 last_ins->next = ins->next;                             
1657                                 ins = ins->next;                                
1658                                 continue;
1659                         }
1660                         break;
1661                 case OP_X86_PUSH_MEMBASE:
1662                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1663                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1664                             ins->inst_basereg == last_ins->inst_destbasereg &&
1665                             ins->inst_offset == last_ins->inst_offset) {
1666                                     ins->opcode = OP_X86_PUSH;
1667                                     ins->sreg1 = last_ins->sreg1;
1668                         }
1669                         break;
1670                 }
1671                 last_ins = ins;
1672                 ins = ins->next;
1673         }
1674         bb->last_ins = last_ins;
1675 }
1676
1677 static const int 
1678 branch_cc_table [] = {
1679         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1680         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1681         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1682 };
1683
1684 /* Maps CMP_... constants to X86_CC_... constants */
1685 static const int
1686 cc_table [] = {
1687         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1688         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1689 };
1690
1691 static const int
1692 cc_signed_table [] = {
1693         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1694         FALSE, FALSE, FALSE, FALSE
1695 };
1696
1697 void
1698 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1699 {
1700         if (cfg->opt & MONO_OPT_PEEPHOLE)
1701                 peephole_pass_1 (cfg, bb);
1702
1703         mono_local_regalloc (cfg, bb);
1704 }
1705
1706 static unsigned char*
1707 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1708 {
1709         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1710         x86_fnstcw_membase(code, X86_ESP, 0);
1711         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1712         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1713         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1714         x86_fldcw_membase (code, X86_ESP, 2);
1715         if (size == 8) {
1716                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1717                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1718                 x86_pop_reg (code, dreg);
1719                 /* FIXME: need the high register 
1720                  * x86_pop_reg (code, dreg_high);
1721                  */
1722         } else {
1723                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1724                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1725                 x86_pop_reg (code, dreg);
1726         }
1727         x86_fldcw_membase (code, X86_ESP, 0);
1728         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1729
1730         if (size == 1)
1731                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1732         else if (size == 2)
1733                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1734         return code;
1735 }
1736
1737 static unsigned char*
1738 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1739 {
1740         int sreg = tree->sreg1;
1741         int need_touch = FALSE;
1742
1743 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1744         need_touch = TRUE;
1745 #endif
1746
1747         if (need_touch) {
1748                 guint8* br[5];
1749
1750                 /*
1751                  * Under Windows:
1752                  * If requested stack size is larger than one page,
1753                  * perform stack-touch operation
1754                  */
1755                 /*
1756                  * Generate stack probe code.
1757                  * Under Windows, it is necessary to allocate one page at a time,
1758                  * "touching" stack after each successful sub-allocation. This is
1759                  * because of the way stack growth is implemented - there is a
1760                  * guard page before the lowest stack page that is currently commited.
1761                  * Stack normally grows sequentially so OS traps access to the
1762                  * guard page and commits more pages when needed.
1763                  */
1764                 x86_test_reg_imm (code, sreg, ~0xFFF);
1765                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1766
1767                 br[2] = code; /* loop */
1768                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1769                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1770
1771                 /* 
1772                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1773                  * that follows only initializes the last part of the area.
1774                  */
1775                 /* Same as the init code below with size==0x1000 */
1776                 if (tree->flags & MONO_INST_INIT) {
1777                         x86_push_reg (code, X86_EAX);
1778                         x86_push_reg (code, X86_ECX);
1779                         x86_push_reg (code, X86_EDI);
1780                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1781                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1782                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1783                         x86_cld (code);
1784                         x86_prefix (code, X86_REP_PREFIX);
1785                         x86_stosl (code);
1786                         x86_pop_reg (code, X86_EDI);
1787                         x86_pop_reg (code, X86_ECX);
1788                         x86_pop_reg (code, X86_EAX);
1789                 }
1790
1791                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1792                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1793                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1794                 x86_patch (br[3], br[2]);
1795                 x86_test_reg_reg (code, sreg, sreg);
1796                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1797                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1798
1799                 br[1] = code; x86_jump8 (code, 0);
1800
1801                 x86_patch (br[0], code);
1802                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1803                 x86_patch (br[1], code);
1804                 x86_patch (br[4], code);
1805         }
1806         else
1807                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1808
1809         if (tree->flags & MONO_INST_INIT) {
1810                 int offset = 0;
1811                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1812                         x86_push_reg (code, X86_EAX);
1813                         offset += 4;
1814                 }
1815                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1816                         x86_push_reg (code, X86_ECX);
1817                         offset += 4;
1818                 }
1819                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1820                         x86_push_reg (code, X86_EDI);
1821                         offset += 4;
1822                 }
1823                 
1824                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1825                 if (sreg != X86_ECX)
1826                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1827                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1828                                 
1829                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1830                 x86_cld (code);
1831                 x86_prefix (code, X86_REP_PREFIX);
1832                 x86_stosl (code);
1833                 
1834                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1835                         x86_pop_reg (code, X86_EDI);
1836                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1837                         x86_pop_reg (code, X86_ECX);
1838                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1839                         x86_pop_reg (code, X86_EAX);
1840         }
1841         return code;
1842 }
1843
1844
1845 static guint8*
1846 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1847 {
1848         CallInfo *cinfo;
1849         int quad;
1850
1851         /* Move return value to the target register */
1852         switch (ins->opcode) {
1853         case CEE_CALL:
1854         case OP_CALL_REG:
1855         case OP_CALL_MEMBASE:
1856                 if (ins->dreg != X86_EAX)
1857                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1858                 break;
1859         case OP_VCALL:
1860         case OP_VCALL_REG:
1861         case OP_VCALL_MEMBASE:
1862                 cinfo = get_call_info (cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
1863                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1864                         /* Pop the destination address from the stack */
1865                         x86_pop_reg (code, X86_ECX);
1866                         
1867                         for (quad = 0; quad < 2; quad ++) {
1868                                 switch (cinfo->ret.pair_storage [quad]) {
1869                                 case ArgInIReg:
1870                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1871                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1872                                         break;
1873                                 case ArgNone:
1874                                         break;
1875                                 default:
1876                                         g_assert_not_reached ();
1877                                 }
1878                         }
1879                 }
1880         default:
1881                 break;
1882         }
1883
1884         return code;
1885 }
1886
1887 /*
1888  * emit_tls_get:
1889  * @code: buffer to store code to
1890  * @dreg: hard register where to place the result
1891  * @tls_offset: offset info
1892  *
1893  * emit_tls_get emits in @code the native code that puts in the dreg register
1894  * the item in the thread local storage identified by tls_offset.
1895  *
1896  * Returns: a pointer to the end of the stored code
1897  */
1898 static guint8*
1899 emit_tls_get (guint8* code, int dreg, int tls_offset)
1900 {
1901 #ifdef PLATFORM_WIN32
1902         /* 
1903          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1904          * Journal and/or a disassembly of the TlsGet () function.
1905          */
1906         g_assert (tls_offset < 64);
1907         x86_prefix (code, X86_FS_PREFIX);
1908         x86_mov_reg_mem (code, dreg, 0x18, 4);
1909         /* Dunno what this does but TlsGetValue () contains it */
1910         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1911         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1912 #else
1913         if (optimize_for_xen) {
1914                 x86_prefix (code, X86_GS_PREFIX);
1915                 x86_mov_reg_mem (code, dreg, 0, 4);
1916                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1917         } else {
1918                 x86_prefix (code, X86_GS_PREFIX);
1919                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1920         }
1921 #endif
1922         return code;
1923 }
1924
1925 /*
1926  * emit_load_volatile_arguments:
1927  *
1928  *  Load volatile arguments from the stack to the original input registers.
1929  * Required before a tail call.
1930  */
1931 static guint8*
1932 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
1933 {
1934         MonoMethod *method = cfg->method;
1935         MonoMethodSignature *sig;
1936         MonoInst *inst;
1937         CallInfo *cinfo;
1938         guint32 i;
1939
1940         /* FIXME: Generate intermediate code instead */
1941
1942         sig = mono_method_signature (method);
1943
1944         cinfo = get_call_info (cfg->mempool, sig, FALSE);
1945         
1946         /* This is the opposite of the code in emit_prolog */
1947
1948         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1949                 ArgInfo *ainfo = cinfo->args + i;
1950                 MonoType *arg_type;
1951                 inst = cfg->args [i];
1952
1953                 if (sig->hasthis && (i == 0))
1954                         arg_type = &mono_defaults.object_class->byval_arg;
1955                 else
1956                         arg_type = sig->params [i - sig->hasthis];
1957
1958                 /*
1959                  * On x86, the arguments are either in their original stack locations, or in
1960                  * global regs.
1961                  */
1962                 if (inst->opcode == OP_REGVAR) {
1963                         g_assert (ainfo->storage == ArgOnStack);
1964                         
1965                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
1966                 }
1967         }
1968
1969         return code;
1970 }
1971
1972 #define REAL_PRINT_REG(text,reg) \
1973 mono_assert (reg >= 0); \
1974 x86_push_reg (code, X86_EAX); \
1975 x86_push_reg (code, X86_EDX); \
1976 x86_push_reg (code, X86_ECX); \
1977 x86_push_reg (code, reg); \
1978 x86_push_imm (code, reg); \
1979 x86_push_imm (code, text " %d %p\n"); \
1980 x86_mov_reg_imm (code, X86_EAX, printf); \
1981 x86_call_reg (code, X86_EAX); \
1982 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1983 x86_pop_reg (code, X86_ECX); \
1984 x86_pop_reg (code, X86_EDX); \
1985 x86_pop_reg (code, X86_EAX);
1986
1987 /* benchmark and set based on cpu */
1988 #define LOOP_ALIGNMENT 8
1989 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
1990
1991 void
1992 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1993 {
1994         MonoInst *ins;
1995         MonoCallInst *call;
1996         guint offset;
1997         guint8 *code = cfg->native_code + cfg->code_len;
1998         MonoInst *last_ins = NULL;
1999         guint last_offset = 0;
2000         int max_len, cpos;
2001
2002         if (cfg->opt & MONO_OPT_PEEPHOLE)
2003                 peephole_pass (cfg, bb);
2004
2005         if (cfg->opt & MONO_OPT_LOOP) {
2006                 int pad, align = LOOP_ALIGNMENT;
2007                 /* set alignment depending on cpu */
2008                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2009                         pad = align - pad;
2010                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2011                         x86_padding (code, pad);
2012                         cfg->code_len += pad;
2013                         bb->native_offset = cfg->code_len;
2014                 }
2015         }
2016
2017         if (cfg->verbose_level > 2)
2018                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2019
2020         cpos = bb->max_offset;
2021
2022         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2023                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2024                 g_assert (!cfg->compile_aot);
2025                 cpos += 6;
2026
2027                 cov->data [bb->dfn].cil_code = bb->cil_code;
2028                 /* this is not thread save, but good enough */
2029                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2030         }
2031
2032         offset = code - cfg->native_code;
2033
2034         mono_debug_open_block (cfg, bb, offset);
2035
2036         ins = bb->code;
2037         while (ins) {
2038                 offset = code - cfg->native_code;
2039
2040                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2041
2042                 if (offset > (cfg->code_size - max_len - 16)) {
2043                         cfg->code_size *= 2;
2044                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2045                         code = cfg->native_code + offset;
2046                         mono_jit_stats.code_reallocs++;
2047                 }
2048
2049                 mono_debug_record_line_number (cfg, ins, offset);
2050
2051                 switch (ins->opcode) {
2052                 case OP_BIGMUL:
2053                         x86_mul_reg (code, ins->sreg2, TRUE);
2054                         break;
2055                 case OP_BIGMUL_UN:
2056                         x86_mul_reg (code, ins->sreg2, FALSE);
2057                         break;
2058                 case OP_X86_SETEQ_MEMBASE:
2059                 case OP_X86_SETNE_MEMBASE:
2060                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2061                                          ins->inst_basereg, ins->inst_offset, TRUE);
2062                         break;
2063                 case OP_STOREI1_MEMBASE_IMM:
2064                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2065                         break;
2066                 case OP_STOREI2_MEMBASE_IMM:
2067                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2068                         break;
2069                 case OP_STORE_MEMBASE_IMM:
2070                 case OP_STOREI4_MEMBASE_IMM:
2071                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2072                         break;
2073                 case OP_STOREI1_MEMBASE_REG:
2074                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2075                         break;
2076                 case OP_STOREI2_MEMBASE_REG:
2077                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2078                         break;
2079                 case OP_STORE_MEMBASE_REG:
2080                 case OP_STOREI4_MEMBASE_REG:
2081                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2082                         break;
2083                 case CEE_LDIND_I:
2084                 case CEE_LDIND_I4:
2085                 case CEE_LDIND_U4:
2086                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2087                         break;
2088                 case OP_LOADU4_MEM:
2089                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2090                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2091                         break;
2092                 case OP_LOAD_MEMBASE:
2093                 case OP_LOADI4_MEMBASE:
2094                 case OP_LOADU4_MEMBASE:
2095                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2096                         break;
2097                 case OP_LOADU1_MEMBASE:
2098                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2099                         break;
2100                 case OP_LOADI1_MEMBASE:
2101                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2102                         break;
2103                 case OP_LOADU2_MEMBASE:
2104                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2105                         break;
2106                 case OP_LOADI2_MEMBASE:
2107                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2108                         break;
2109                 case CEE_CONV_I1:
2110                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2111                         break;
2112                 case CEE_CONV_I2:
2113                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2114                         break;
2115                 case CEE_CONV_U1:
2116                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2117                         break;
2118                 case CEE_CONV_U2:
2119                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2120                         break;
2121                 case OP_COMPARE:
2122                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2123                         break;
2124                 case OP_COMPARE_IMM:
2125                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2126                         break;
2127                 case OP_X86_COMPARE_MEMBASE_REG:
2128                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2129                         break;
2130                 case OP_X86_COMPARE_MEMBASE_IMM:
2131                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2132                         break;
2133                 case OP_X86_COMPARE_MEMBASE8_IMM:
2134                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2135                         break;
2136                 case OP_X86_COMPARE_REG_MEMBASE:
2137                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2138                         break;
2139                 case OP_X86_COMPARE_MEM_IMM:
2140                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2141                         break;
2142                 case OP_X86_TEST_NULL:
2143                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2144                         break;
2145                 case OP_X86_ADD_MEMBASE_IMM:
2146                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2147                         break;
2148                 case OP_X86_ADD_MEMBASE:
2149                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2150                         break;
2151                 case OP_X86_SUB_MEMBASE_IMM:
2152                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2153                         break;
2154                 case OP_X86_SUB_MEMBASE:
2155                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2156                         break;
2157                 case OP_X86_AND_MEMBASE_IMM:
2158                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2159                         break;
2160                 case OP_X86_OR_MEMBASE_IMM:
2161                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2162                         break;
2163                 case OP_X86_XOR_MEMBASE_IMM:
2164                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2165                         break;
2166                 case OP_X86_INC_MEMBASE:
2167                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2168                         break;
2169                 case OP_X86_INC_REG:
2170                         x86_inc_reg (code, ins->dreg);
2171                         break;
2172                 case OP_X86_DEC_MEMBASE:
2173                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2174                         break;
2175                 case OP_X86_DEC_REG:
2176                         x86_dec_reg (code, ins->dreg);
2177                         break;
2178                 case OP_X86_MUL_MEMBASE:
2179                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2180                         break;
2181                 case OP_BREAK:
2182                         x86_breakpoint (code);
2183                         break;
2184                 case OP_ADDCC:
2185                 case CEE_ADD:
2186                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2187                         break;
2188                 case OP_ADC:
2189                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2190                         break;
2191                 case OP_ADDCC_IMM:
2192                 case OP_ADD_IMM:
2193                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2194                         break;
2195                 case OP_ADC_IMM:
2196                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2197                         break;
2198                 case OP_SUBCC:
2199                 case CEE_SUB:
2200                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2201                         break;
2202                 case OP_SBB:
2203                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2204                         break;
2205                 case OP_SUBCC_IMM:
2206                 case OP_SUB_IMM:
2207                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2208                         break;
2209                 case OP_SBB_IMM:
2210                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2211                         break;
2212                 case CEE_AND:
2213                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2214                         break;
2215                 case OP_AND_IMM:
2216                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2217                         break;
2218                 case CEE_DIV:
2219                         x86_cdq (code);
2220                         x86_div_reg (code, ins->sreg2, TRUE);
2221                         break;
2222                 case CEE_DIV_UN:
2223                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2224                         x86_div_reg (code, ins->sreg2, FALSE);
2225                         break;
2226                 case OP_DIV_IMM:
2227                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2228                         x86_cdq (code);
2229                         x86_div_reg (code, ins->sreg2, TRUE);
2230                         break;
2231                 case CEE_REM:
2232                         x86_cdq (code);
2233                         x86_div_reg (code, ins->sreg2, TRUE);
2234                         break;
2235                 case CEE_REM_UN:
2236                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2237                         x86_div_reg (code, ins->sreg2, FALSE);
2238                         break;
2239                 case OP_REM_IMM:
2240                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2241                         x86_cdq (code);
2242                         x86_div_reg (code, ins->sreg2, TRUE);
2243                         break;
2244                 case CEE_OR:
2245                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2246                         break;
2247                 case OP_OR_IMM:
2248                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2249                         break;
2250                 case CEE_XOR:
2251                 case OP_IXOR:
2252                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2253                         break;
2254                 case OP_XOR_IMM:
2255                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2256                         break;
2257                 case CEE_SHL:
2258                         g_assert (ins->sreg2 == X86_ECX);
2259                         x86_shift_reg (code, X86_SHL, ins->dreg);
2260                         break;
2261                 case CEE_SHR:
2262                         g_assert (ins->sreg2 == X86_ECX);
2263                         x86_shift_reg (code, X86_SAR, ins->dreg);
2264                         break;
2265                 case OP_SHR_IMM:
2266                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2267                         break;
2268                 case OP_SHR_UN_IMM:
2269                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2270                         break;
2271                 case CEE_SHR_UN:
2272                         g_assert (ins->sreg2 == X86_ECX);
2273                         x86_shift_reg (code, X86_SHR, ins->dreg);
2274                         break;
2275                 case OP_SHL_IMM:
2276                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2277                         break;
2278                 case OP_LSHL: {
2279                         guint8 *jump_to_end;
2280
2281                         /* handle shifts below 32 bits */
2282                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2283                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2284
2285                         x86_test_reg_imm (code, X86_ECX, 32);
2286                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2287
2288                         /* handle shift over 32 bit */
2289                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2290                         x86_clear_reg (code, ins->sreg1);
2291                         
2292                         x86_patch (jump_to_end, code);
2293                         }
2294                         break;
2295                 case OP_LSHR: {
2296                         guint8 *jump_to_end;
2297
2298                         /* handle shifts below 32 bits */
2299                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2300                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2301
2302                         x86_test_reg_imm (code, X86_ECX, 32);
2303                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2304
2305                         /* handle shifts over 31 bits */
2306                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2307                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2308                         
2309                         x86_patch (jump_to_end, code);
2310                         }
2311                         break;
2312                 case OP_LSHR_UN: {
2313                         guint8 *jump_to_end;
2314
2315                         /* handle shifts below 32 bits */
2316                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2317                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2318
2319                         x86_test_reg_imm (code, X86_ECX, 32);
2320                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2321
2322                         /* handle shifts over 31 bits */
2323                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2324                         x86_clear_reg (code, ins->backend.reg3);
2325                         
2326                         x86_patch (jump_to_end, code);
2327                         }
2328                         break;
2329                 case OP_LSHL_IMM:
2330                         if (ins->inst_imm >= 32) {
2331                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2332                                 x86_clear_reg (code, ins->sreg1);
2333                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2334                         } else {
2335                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2336                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2337                         }
2338                         break;
2339                 case OP_LSHR_IMM:
2340                         if (ins->inst_imm >= 32) {
2341                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2342                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2343                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2344                         } else {
2345                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2346                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2347                         }
2348                         break;
2349                 case OP_LSHR_UN_IMM:
2350                         if (ins->inst_imm >= 32) {
2351                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2352                                 x86_clear_reg (code, ins->backend.reg3);
2353                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2354                         } else {
2355                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2356                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2357                         }
2358                         break;
2359                 case CEE_NOT:
2360                         x86_not_reg (code, ins->sreg1);
2361                         break;
2362                 case CEE_NEG:
2363                         x86_neg_reg (code, ins->sreg1);
2364                         break;
2365                 case OP_SEXT_I1:
2366                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2367                         break;
2368                 case OP_SEXT_I2:
2369                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2370                         break;
2371                 case CEE_MUL:
2372                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2373                         break;
2374                 case OP_MUL_IMM:
2375                         switch (ins->inst_imm) {
2376                         case 2:
2377                                 /* MOV r1, r2 */
2378                                 /* ADD r1, r1 */
2379                                 if (ins->dreg != ins->sreg1)
2380                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2381                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2382                                 break;
2383                         case 3:
2384                                 /* LEA r1, [r2 + r2*2] */
2385                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2386                                 break;
2387                         case 5:
2388                                 /* LEA r1, [r2 + r2*4] */
2389                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2390                                 break;
2391                         case 6:
2392                                 /* LEA r1, [r2 + r2*2] */
2393                                 /* ADD r1, r1          */
2394                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2395                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2396                                 break;
2397                         case 9:
2398                                 /* LEA r1, [r2 + r2*8] */
2399                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2400                                 break;
2401                         case 10:
2402                                 /* LEA r1, [r2 + r2*4] */
2403                                 /* ADD r1, r1          */
2404                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2405                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2406                                 break;
2407                         case 12:
2408                                 /* LEA r1, [r2 + r2*2] */
2409                                 /* SHL r1, 2           */
2410                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2411                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2412                                 break;
2413                         case 25:
2414                                 /* LEA r1, [r2 + r2*4] */
2415                                 /* LEA r1, [r1 + r1*4] */
2416                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2417                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2418                                 break;
2419                         case 100:
2420                                 /* LEA r1, [r2 + r2*4] */
2421                                 /* SHL r1, 2           */
2422                                 /* LEA r1, [r1 + r1*4] */
2423                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2424                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2425                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2426                                 break;
2427                         default:
2428                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2429                                 break;
2430                         }
2431                         break;
2432                 case CEE_MUL_OVF:
2433                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2434                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2435                         break;
2436                 case CEE_MUL_OVF_UN: {
2437                         /* the mul operation and the exception check should most likely be split */
2438                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2439                         /*g_assert (ins->sreg2 == X86_EAX);
2440                         g_assert (ins->dreg == X86_EAX);*/
2441                         if (ins->sreg2 == X86_EAX) {
2442                                 non_eax_reg = ins->sreg1;
2443                         } else if (ins->sreg1 == X86_EAX) {
2444                                 non_eax_reg = ins->sreg2;
2445                         } else {
2446                                 /* no need to save since we're going to store to it anyway */
2447                                 if (ins->dreg != X86_EAX) {
2448                                         saved_eax = TRUE;
2449                                         x86_push_reg (code, X86_EAX);
2450                                 }
2451                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2452                                 non_eax_reg = ins->sreg2;
2453                         }
2454                         if (ins->dreg == X86_EDX) {
2455                                 if (!saved_eax) {
2456                                         saved_eax = TRUE;
2457                                         x86_push_reg (code, X86_EAX);
2458                                 }
2459                         } else if (ins->dreg != X86_EAX) {
2460                                 saved_edx = TRUE;
2461                                 x86_push_reg (code, X86_EDX);
2462                         }
2463                         x86_mul_reg (code, non_eax_reg, FALSE);
2464                         /* save before the check since pop and mov don't change the flags */
2465                         if (ins->dreg != X86_EAX)
2466                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2467                         if (saved_edx)
2468                                 x86_pop_reg (code, X86_EDX);
2469                         if (saved_eax)
2470                                 x86_pop_reg (code, X86_EAX);
2471                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2472                         break;
2473                 }
2474                 case OP_ICONST:
2475                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2476                         break;
2477                 case OP_AOTCONST:
2478                         g_assert_not_reached ();
2479                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2480                         x86_mov_reg_imm (code, ins->dreg, 0);
2481                         break;
2482                 case OP_LOAD_GOTADDR:
2483                         x86_call_imm (code, 0);
2484                         /* 
2485                          * The patch needs to point to the pop, since the GOT offset needs 
2486                          * to be added to that address.
2487                          */
2488                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2489                         x86_pop_reg (code, ins->dreg);
2490                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2491                         break;
2492                 case OP_GOT_ENTRY:
2493                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2494                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2495                         break;
2496                 case OP_X86_PUSH_GOT_ENTRY:
2497                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2498                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2499                         break;
2500                 case CEE_CONV_I4:
2501                 case OP_MOVE:
2502                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2503                         break;
2504                 case CEE_CONV_U4:
2505                         g_assert_not_reached ();
2506                 case OP_JMP: {
2507                         /*
2508                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2509                          * Keep in sync with the code in emit_epilog.
2510                          */
2511                         int pos = 0;
2512
2513                         /* FIXME: no tracing support... */
2514                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2515                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2516                         /* reset offset to make max_len work */
2517                         offset = code - cfg->native_code;
2518
2519                         g_assert (!cfg->method->save_lmf);
2520
2521                         code = emit_load_volatile_arguments (cfg, code);
2522
2523                         if (cfg->used_int_regs & (1 << X86_EBX))
2524                                 pos -= 4;
2525                         if (cfg->used_int_regs & (1 << X86_EDI))
2526                                 pos -= 4;
2527                         if (cfg->used_int_regs & (1 << X86_ESI))
2528                                 pos -= 4;
2529                         if (pos)
2530                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2531         
2532                         if (cfg->used_int_regs & (1 << X86_ESI))
2533                                 x86_pop_reg (code, X86_ESI);
2534                         if (cfg->used_int_regs & (1 << X86_EDI))
2535                                 x86_pop_reg (code, X86_EDI);
2536                         if (cfg->used_int_regs & (1 << X86_EBX))
2537                                 x86_pop_reg (code, X86_EBX);
2538         
2539                         /* restore ESP/EBP */
2540                         x86_leave (code);
2541                         offset = code - cfg->native_code;
2542                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2543                         x86_jump32 (code, 0);
2544                         break;
2545                 }
2546                 case OP_CHECK_THIS:
2547                         /* ensure ins->sreg1 is not NULL
2548                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2549                          * cmp DWORD PTR [eax], 0
2550                          */
2551                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2552                         break;
2553                 case OP_ARGLIST: {
2554                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2555                         x86_push_reg (code, hreg);
2556                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2557                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2558                         x86_pop_reg (code, hreg);
2559                         break;
2560                 }
2561                 case OP_FCALL:
2562                 case OP_LCALL:
2563                 case OP_VCALL:
2564                 case OP_VOIDCALL:
2565                 case CEE_CALL:
2566                         call = (MonoCallInst*)ins;
2567                         if (ins->flags & MONO_INST_HAS_METHOD)
2568                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2569                         else
2570                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2571                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2572                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2573                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2574                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2575                                  * smart enough to do that optimization yet
2576                                  *
2577                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2578                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2579                                  * (most likely from locality benefits). People with other processors should
2580                                  * check on theirs to see what happens.
2581                                  */
2582                                 if (call->stack_usage == 4) {
2583                                         /* we want to use registers that won't get used soon, so use
2584                                          * ecx, as eax will get allocated first. edx is used by long calls,
2585                                          * so we can't use that.
2586                                          */
2587                                         
2588                                         x86_pop_reg (code, X86_ECX);
2589                                 } else {
2590                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2591                                 }
2592                         }
2593                         code = emit_move_return_value (cfg, ins, code);
2594                         break;
2595                 case OP_FCALL_REG:
2596                 case OP_LCALL_REG:
2597                 case OP_VCALL_REG:
2598                 case OP_VOIDCALL_REG:
2599                 case OP_CALL_REG:
2600                         call = (MonoCallInst*)ins;
2601                         x86_call_reg (code, ins->sreg1);
2602                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2603                                 if (call->stack_usage == 4)
2604                                         x86_pop_reg (code, X86_ECX);
2605                                 else
2606                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2607                         }
2608                         code = emit_move_return_value (cfg, ins, code);
2609                         break;
2610                 case OP_FCALL_MEMBASE:
2611                 case OP_LCALL_MEMBASE:
2612                 case OP_VCALL_MEMBASE:
2613                 case OP_VOIDCALL_MEMBASE:
2614                 case OP_CALL_MEMBASE:
2615                         call = (MonoCallInst*)ins;
2616                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2617                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2618                                 if (call->stack_usage == 4)
2619                                         x86_pop_reg (code, X86_ECX);
2620                                 else
2621                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2622                         }
2623                         code = emit_move_return_value (cfg, ins, code);
2624                         break;
2625                 case OP_OUTARG:
2626                 case OP_X86_PUSH:
2627                         x86_push_reg (code, ins->sreg1);
2628                         break;
2629                 case OP_X86_PUSH_IMM:
2630                         x86_push_imm (code, ins->inst_imm);
2631                         break;
2632                 case OP_X86_PUSH_MEMBASE:
2633                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2634                         break;
2635                 case OP_X86_PUSH_OBJ: 
2636                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2637                         x86_push_reg (code, X86_EDI);
2638                         x86_push_reg (code, X86_ESI);
2639                         x86_push_reg (code, X86_ECX);
2640                         if (ins->inst_offset)
2641                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2642                         else
2643                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2644                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2645                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2646                         x86_cld (code);
2647                         x86_prefix (code, X86_REP_PREFIX);
2648                         x86_movsd (code);
2649                         x86_pop_reg (code, X86_ECX);
2650                         x86_pop_reg (code, X86_ESI);
2651                         x86_pop_reg (code, X86_EDI);
2652                         break;
2653                 case OP_X86_LEA:
2654                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2655                         break;
2656                 case OP_X86_LEA_MEMBASE:
2657                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2658                         break;
2659                 case OP_X86_XCHG:
2660                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2661                         break;
2662                 case OP_LOCALLOC:
2663                         /* keep alignment */
2664                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2665                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2666                         code = mono_emit_stack_alloc (code, ins);
2667                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2668                         break;
2669                 case CEE_RET:
2670                         x86_ret (code);
2671                         break;
2672                 case OP_THROW: {
2673                         x86_push_reg (code, ins->sreg1);
2674                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2675                                                           (gpointer)"mono_arch_throw_exception");
2676                         break;
2677                 }
2678                 case OP_RETHROW: {
2679                         x86_push_reg (code, ins->sreg1);
2680                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2681                                                           (gpointer)"mono_arch_rethrow_exception");
2682                         break;
2683                 }
2684                 case OP_CALL_HANDLER: 
2685                         /* Align stack */
2686 #ifdef __APPLE__
2687                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2688 #endif
2689                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2690                         x86_call_imm (code, 0);
2691 #ifdef __APPLE__
2692                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2693 #endif
2694                         break;
2695                 case OP_LABEL:
2696                         ins->inst_c0 = code - cfg->native_code;
2697                         break;
2698                 case OP_BR:
2699                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2700                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2701                         //break;
2702                         if (ins->flags & MONO_INST_BRLABEL) {
2703                                 if (ins->inst_i0->inst_c0) {
2704                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2705                                 } else {
2706                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2707                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2708                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2709                                                 x86_jump8 (code, 0);
2710                                         else 
2711                                                 x86_jump32 (code, 0);
2712                                 }
2713                         } else {
2714                                 if (ins->inst_target_bb->native_offset) {
2715                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2716                                 } else {
2717                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2718                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2719                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2720                                                 x86_jump8 (code, 0);
2721                                         else 
2722                                                 x86_jump32 (code, 0);
2723                                 } 
2724                         }
2725                         break;
2726                 case OP_BR_REG:
2727                         x86_jump_reg (code, ins->sreg1);
2728                         break;
2729                 case OP_CEQ:
2730                 case OP_CLT:
2731                 case OP_CLT_UN:
2732                 case OP_CGT:
2733                 case OP_CGT_UN:
2734                 case OP_CNE:
2735                         x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2736                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2737                         break;
2738                 case OP_COND_EXC_EQ:
2739                 case OP_COND_EXC_NE_UN:
2740                 case OP_COND_EXC_LT:
2741                 case OP_COND_EXC_LT_UN:
2742                 case OP_COND_EXC_GT:
2743                 case OP_COND_EXC_GT_UN:
2744                 case OP_COND_EXC_GE:
2745                 case OP_COND_EXC_GE_UN:
2746                 case OP_COND_EXC_LE:
2747                 case OP_COND_EXC_LE_UN:
2748                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
2749                         break;
2750                 case OP_COND_EXC_OV:
2751                 case OP_COND_EXC_NO:
2752                 case OP_COND_EXC_C:
2753                 case OP_COND_EXC_NC:
2754                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2755                         break;
2756                 case CEE_BEQ:
2757                 case CEE_BNE_UN:
2758                 case CEE_BLT:
2759                 case CEE_BLT_UN:
2760                 case CEE_BGT:
2761                 case CEE_BGT_UN:
2762                 case CEE_BGE:
2763                 case CEE_BGE_UN:
2764                 case CEE_BLE:
2765                 case CEE_BLE_UN:
2766                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2767                         break;
2768
2769                 /* floating point opcodes */
2770                 case OP_R8CONST: {
2771                         double d = *(double *)ins->inst_p0;
2772
2773                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2774                                 x86_fldz (code);
2775                         } else if (d == 1.0) {
2776                                 x86_fld1 (code);
2777                         } else {
2778                                 if (cfg->compile_aot) {
2779                                         guint32 *val = (guint32*)&d;
2780                                         x86_push_imm (code, val [1]);
2781                                         x86_push_imm (code, val [0]);
2782                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2783                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2784                                 }
2785                                 else {
2786                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2787                                         x86_fld (code, NULL, TRUE);
2788                                 }
2789                         }
2790                         break;
2791                 }
2792                 case OP_R4CONST: {
2793                         float f = *(float *)ins->inst_p0;
2794
2795                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2796                                 x86_fldz (code);
2797                         } else if (f == 1.0) {
2798                                 x86_fld1 (code);
2799                         } else {
2800                                 if (cfg->compile_aot) {
2801                                         guint32 val = *(guint32*)&f;
2802                                         x86_push_imm (code, val);
2803                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2804                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2805                                 }
2806                                 else {
2807                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2808                                         x86_fld (code, NULL, FALSE);
2809                                 }
2810                         }
2811                         break;
2812                 }
2813                 case OP_STORER8_MEMBASE_REG:
2814                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2815                         break;
2816                 case OP_LOADR8_SPILL_MEMBASE:
2817                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2818                         x86_fxch (code, 1);
2819                         break;
2820                 case OP_LOADR8_MEMBASE:
2821                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2822                         break;
2823                 case OP_STORER4_MEMBASE_REG:
2824                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2825                         break;
2826                 case OP_LOADR4_MEMBASE:
2827                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2828                         break;
2829                 case CEE_CONV_R4: /* FIXME: change precision */
2830                 case CEE_CONV_R8:
2831                         x86_push_reg (code, ins->sreg1);
2832                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2833                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2834                         break;
2835                 case OP_X86_FP_LOAD_I8:
2836                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2837                         break;
2838                 case OP_X86_FP_LOAD_I4:
2839                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2840                         break;
2841                 case OP_FCONV_TO_I1:
2842                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2843                         break;
2844                 case OP_FCONV_TO_U1:
2845                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2846                         break;
2847                 case OP_FCONV_TO_I2:
2848                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2849                         break;
2850                 case OP_FCONV_TO_U2:
2851                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2852                         break;
2853                 case OP_FCONV_TO_I4:
2854                 case OP_FCONV_TO_I:
2855                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2856                         break;
2857                 case OP_FCONV_TO_I8:
2858                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2859                         x86_fnstcw_membase(code, X86_ESP, 0);
2860                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2861                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2862                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2863                         x86_fldcw_membase (code, X86_ESP, 2);
2864                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2865                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2866                         x86_pop_reg (code, ins->dreg);
2867                         x86_pop_reg (code, ins->backend.reg3);
2868                         x86_fldcw_membase (code, X86_ESP, 0);
2869                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2870                         break;
2871                 case OP_LCONV_TO_R_UN: { 
2872                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2873                         guint8 *br;
2874
2875                         /* load 64bit integer to FP stack */
2876                         x86_push_imm (code, 0);
2877                         x86_push_reg (code, ins->sreg2);
2878                         x86_push_reg (code, ins->sreg1);
2879                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2880                         /* store as 80bit FP value */
2881                         x86_fst80_membase (code, X86_ESP, 0);
2882                         
2883                         /* test if lreg is negative */
2884                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2885                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2886         
2887                         /* add correction constant mn */
2888                         x86_fld80_mem (code, mn);
2889                         x86_fld80_membase (code, X86_ESP, 0);
2890                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2891                         x86_fst80_membase (code, X86_ESP, 0);
2892
2893                         x86_patch (br, code);
2894
2895                         x86_fld80_membase (code, X86_ESP, 0);
2896                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2897
2898                         break;
2899                 }
2900                 case OP_LCONV_TO_OVF_I: {
2901                         guint8 *br [3], *label [1];
2902                         MonoInst *tins;
2903
2904                         /* 
2905                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2906                          */
2907                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2908
2909                         /* If the low word top bit is set, see if we are negative */
2910                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2911                         /* We are not negative (no top bit set, check for our top word to be zero */
2912                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2913                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2914                         label [0] = code;
2915
2916                         /* throw exception */
2917                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2918                         if (tins) {
2919                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2920                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2921                                         x86_jump8 (code, 0);
2922                                 else
2923                                         x86_jump32 (code, 0);
2924                         } else {
2925                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2926                                 x86_jump32 (code, 0);
2927                         }
2928         
2929         
2930                         x86_patch (br [0], code);
2931                         /* our top bit is set, check that top word is 0xfffffff */
2932                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2933                 
2934                         x86_patch (br [1], code);
2935                         /* nope, emit exception */
2936                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2937                         x86_patch (br [2], label [0]);
2938
2939                         if (ins->dreg != ins->sreg1)
2940                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2941                         break;
2942                 }
2943                 case OP_FADD:
2944                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2945                         break;
2946                 case OP_FSUB:
2947                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2948                         break;          
2949                 case OP_FMUL:
2950                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2951                         break;          
2952                 case OP_FDIV:
2953                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2954                         break;          
2955                 case OP_FNEG:
2956                         x86_fchs (code);
2957                         break;          
2958                 case OP_SIN:
2959                         x86_fsin (code);
2960                         x86_fldz (code);
2961                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2962                         break;          
2963                 case OP_COS:
2964                         x86_fcos (code);
2965                         x86_fldz (code);
2966                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2967                         break;          
2968                 case OP_ABS:
2969                         x86_fabs (code);
2970                         break;          
2971                 case OP_TAN: {
2972                         /* 
2973                          * it really doesn't make sense to inline all this code,
2974                          * it's here just to show that things may not be as simple 
2975                          * as they appear.
2976                          */
2977                         guchar *check_pos, *end_tan, *pop_jump;
2978                         x86_push_reg (code, X86_EAX);
2979                         x86_fptan (code);
2980                         x86_fnstsw (code);
2981                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2982                         check_pos = code;
2983                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2984                         x86_fstp (code, 0); /* pop the 1.0 */
2985                         end_tan = code;
2986                         x86_jump8 (code, 0);
2987                         x86_fldpi (code);
2988                         x86_fp_op (code, X86_FADD, 0);
2989                         x86_fxch (code, 1);
2990                         x86_fprem1 (code);
2991                         x86_fstsw (code);
2992                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2993                         pop_jump = code;
2994                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2995                         x86_fstp (code, 1);
2996                         x86_fptan (code);
2997                         x86_patch (pop_jump, code);
2998                         x86_fstp (code, 0); /* pop the 1.0 */
2999                         x86_patch (check_pos, code);
3000                         x86_patch (end_tan, code);
3001                         x86_fldz (code);
3002                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3003                         x86_pop_reg (code, X86_EAX);
3004                         break;
3005                 }
3006                 case OP_ATAN:
3007                         x86_fld1 (code);
3008                         x86_fpatan (code);
3009                         x86_fldz (code);
3010                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3011                         break;          
3012                 case OP_SQRT:
3013                         x86_fsqrt (code);
3014                         break;          
3015                 case OP_X86_FPOP:
3016                         x86_fstp (code, 0);
3017                         break;          
3018                 case OP_FREM: {
3019                         guint8 *l1, *l2;
3020
3021                         x86_push_reg (code, X86_EAX);
3022                         /* we need to exchange ST(0) with ST(1) */
3023                         x86_fxch (code, 1);
3024
3025                         /* this requires a loop, because fprem somtimes 
3026                          * returns a partial remainder */
3027                         l1 = code;
3028                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3029                         /* x86_fprem1 (code); */
3030                         x86_fprem (code);
3031                         x86_fnstsw (code);
3032                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3033                         l2 = code + 2;
3034                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3035
3036                         /* pop result */
3037                         x86_fstp (code, 1);
3038
3039                         x86_pop_reg (code, X86_EAX);
3040                         break;
3041                 }
3042                 case OP_FCOMPARE:
3043                         if (cfg->opt & MONO_OPT_FCMOV) {
3044                                 x86_fcomip (code, 1);
3045                                 x86_fstp (code, 0);
3046                                 break;
3047                         }
3048                         /* this overwrites EAX */
3049                         EMIT_FPCOMPARE(code);
3050                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3051                         break;
3052                 case OP_FCEQ:
3053                         if (cfg->opt & MONO_OPT_FCMOV) {
3054                                 /* zeroing the register at the start results in 
3055                                  * shorter and faster code (we can also remove the widening op)
3056                                  */
3057                                 guchar *unordered_check;
3058                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3059                                 x86_fcomip (code, 1);
3060                                 x86_fstp (code, 0);
3061                                 unordered_check = code;
3062                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3063                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3064                                 x86_patch (unordered_check, code);
3065                                 break;
3066                         }
3067                         if (ins->dreg != X86_EAX) 
3068                                 x86_push_reg (code, X86_EAX);
3069
3070                         EMIT_FPCOMPARE(code);
3071                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3072                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3073                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3074                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3075
3076                         if (ins->dreg != X86_EAX) 
3077                                 x86_pop_reg (code, X86_EAX);
3078                         break;
3079                 case OP_FCLT:
3080                 case OP_FCLT_UN:
3081                         if (cfg->opt & MONO_OPT_FCMOV) {
3082                                 /* zeroing the register at the start results in 
3083                                  * shorter and faster code (we can also remove the widening op)
3084                                  */
3085                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3086                                 x86_fcomip (code, 1);
3087                                 x86_fstp (code, 0);
3088                                 if (ins->opcode == OP_FCLT_UN) {
3089                                         guchar *unordered_check = code;
3090                                         guchar *jump_to_end;
3091                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3092                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3093                                         jump_to_end = code;
3094                                         x86_jump8 (code, 0);
3095                                         x86_patch (unordered_check, code);
3096                                         x86_inc_reg (code, ins->dreg);
3097                                         x86_patch (jump_to_end, code);
3098                                 } else {
3099                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3100                                 }
3101                                 break;
3102                         }
3103                         if (ins->dreg != X86_EAX) 
3104                                 x86_push_reg (code, X86_EAX);
3105
3106                         EMIT_FPCOMPARE(code);
3107                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3108                         if (ins->opcode == OP_FCLT_UN) {
3109                                 guchar *is_not_zero_check, *end_jump;
3110                                 is_not_zero_check = code;
3111                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3112                                 end_jump = code;
3113                                 x86_jump8 (code, 0);
3114                                 x86_patch (is_not_zero_check, code);
3115                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3116
3117                                 x86_patch (end_jump, code);
3118                         }
3119                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3120                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3121
3122                         if (ins->dreg != X86_EAX) 
3123                                 x86_pop_reg (code, X86_EAX);
3124                         break;
3125                 case OP_FCGT:
3126                 case OP_FCGT_UN:
3127                         if (cfg->opt & MONO_OPT_FCMOV) {
3128                                 /* zeroing the register at the start results in 
3129                                  * shorter and faster code (we can also remove the widening op)
3130                                  */
3131                                 guchar *unordered_check;
3132                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3133                                 x86_fcomip (code, 1);
3134                                 x86_fstp (code, 0);
3135                                 if (ins->opcode == OP_FCGT) {
3136                                         unordered_check = code;
3137                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3138                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3139                                         x86_patch (unordered_check, code);
3140                                 } else {
3141                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3142                                 }
3143                                 break;
3144                         }
3145                         if (ins->dreg != X86_EAX) 
3146                                 x86_push_reg (code, X86_EAX);
3147
3148                         EMIT_FPCOMPARE(code);
3149                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3150                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3151                         if (ins->opcode == OP_FCGT_UN) {
3152                                 guchar *is_not_zero_check, *end_jump;
3153                                 is_not_zero_check = code;
3154                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3155                                 end_jump = code;
3156                                 x86_jump8 (code, 0);
3157                                 x86_patch (is_not_zero_check, code);
3158                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3159         
3160                                 x86_patch (end_jump, code);
3161                         }
3162                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3163                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3164
3165                         if (ins->dreg != X86_EAX) 
3166                                 x86_pop_reg (code, X86_EAX);
3167                         break;
3168                 case OP_FBEQ:
3169                         if (cfg->opt & MONO_OPT_FCMOV) {
3170                                 guchar *jump = code;
3171                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3172                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3173                                 x86_patch (jump, code);
3174                                 break;
3175                         }
3176                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3177                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3178                         break;
3179                 case OP_FBNE_UN:
3180                         /* Branch if C013 != 100 */
3181                         if (cfg->opt & MONO_OPT_FCMOV) {
3182                                 /* branch if !ZF or (PF|CF) */
3183                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3184                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3185                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3186                                 break;
3187                         }
3188                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3189                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3190                         break;
3191                 case OP_FBLT:
3192                         if (cfg->opt & MONO_OPT_FCMOV) {
3193                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3194                                 break;
3195                         }
3196                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3197                         break;
3198                 case OP_FBLT_UN:
3199                         if (cfg->opt & MONO_OPT_FCMOV) {
3200                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3201                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3202                                 break;
3203                         }
3204                         if (ins->opcode == OP_FBLT_UN) {
3205                                 guchar *is_not_zero_check, *end_jump;
3206                                 is_not_zero_check = code;
3207                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3208                                 end_jump = code;
3209                                 x86_jump8 (code, 0);
3210                                 x86_patch (is_not_zero_check, code);
3211                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3212
3213                                 x86_patch (end_jump, code);
3214                         }
3215                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3216                         break;
3217                 case OP_FBGT:
3218                 case OP_FBGT_UN:
3219                         if (cfg->opt & MONO_OPT_FCMOV) {
3220                                 if (ins->opcode == OP_FBGT) {
3221                                         guchar *br1;
3222
3223                                         /* skip branch if C1=1 */
3224                                         br1 = code;
3225                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3226                                         /* branch if (C0 | C3) = 1 */
3227                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3228                                         x86_patch (br1, code);
3229                                 } else {
3230                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3231                                 }
3232                                 break;
3233                         }
3234                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3235                         if (ins->opcode == OP_FBGT_UN) {
3236                                 guchar *is_not_zero_check, *end_jump;
3237                                 is_not_zero_check = code;
3238                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3239                                 end_jump = code;
3240                                 x86_jump8 (code, 0);
3241                                 x86_patch (is_not_zero_check, code);
3242                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3243
3244                                 x86_patch (end_jump, code);
3245                         }
3246                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3247                         break;
3248                 case OP_FBGE:
3249                         /* Branch if C013 == 100 or 001 */
3250                         if (cfg->opt & MONO_OPT_FCMOV) {
3251                                 guchar *br1;
3252
3253                                 /* skip branch if C1=1 */
3254                                 br1 = code;
3255                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3256                                 /* branch if (C0 | C3) = 1 */
3257                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3258                                 x86_patch (br1, code);
3259                                 break;
3260                         }
3261                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3262                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3263                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3264                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3265                         break;
3266                 case OP_FBGE_UN:
3267                         /* Branch if C013 == 000 */
3268                         if (cfg->opt & MONO_OPT_FCMOV) {
3269                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3270                                 break;
3271                         }
3272                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3273                         break;
3274                 case OP_FBLE:
3275                         /* Branch if C013=000 or 100 */
3276                         if (cfg->opt & MONO_OPT_FCMOV) {
3277                                 guchar *br1;
3278
3279                                 /* skip branch if C1=1 */
3280                                 br1 = code;
3281                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3282                                 /* branch if C0=0 */
3283                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3284                                 x86_patch (br1, code);
3285                                 break;
3286                         }
3287                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3288                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3289                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3290                         break;
3291                 case OP_FBLE_UN:
3292                         /* Branch if C013 != 001 */
3293                         if (cfg->opt & MONO_OPT_FCMOV) {
3294                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3295                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3296                                 break;
3297                         }
3298                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3299                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3300                         break;
3301                 case OP_CKFINITE: {
3302                         x86_push_reg (code, X86_EAX);
3303                         x86_fxam (code);
3304                         x86_fnstsw (code);
3305                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3306                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3307                         x86_pop_reg (code, X86_EAX);
3308                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3309                         break;
3310                 }
3311                 case OP_TLS_GET: {
3312                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3313                         break;
3314                 }
3315                 case OP_MEMORY_BARRIER: {
3316                         /* Not needed on x86 */
3317                         break;
3318                 }
3319                 case OP_ATOMIC_ADD_I4: {
3320                         int dreg = ins->dreg;
3321
3322                         if (dreg == ins->inst_basereg) {
3323                                 x86_push_reg (code, ins->sreg2);
3324                                 dreg = ins->sreg2;
3325                         } 
3326                         
3327                         if (dreg != ins->sreg2)
3328                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3329
3330                         x86_prefix (code, X86_LOCK_PREFIX);
3331                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3332
3333                         if (dreg != ins->dreg) {
3334                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3335                                 x86_pop_reg (code, dreg);
3336                         }
3337
3338                         break;
3339                 }
3340                 case OP_ATOMIC_ADD_NEW_I4: {
3341                         int dreg = ins->dreg;
3342
3343                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3344                         if (ins->sreg2 == dreg) {
3345                                 if (dreg == X86_EBX) {
3346                                         dreg = X86_EDI;
3347                                         if (ins->inst_basereg == X86_EDI)
3348                                                 dreg = X86_ESI;
3349                                 } else {
3350                                         dreg = X86_EBX;
3351                                         if (ins->inst_basereg == X86_EBX)
3352                                                 dreg = X86_EDI;
3353                                 }
3354                         } else if (ins->inst_basereg == dreg) {
3355                                 if (dreg == X86_EBX) {
3356                                         dreg = X86_EDI;
3357                                         if (ins->sreg2 == X86_EDI)
3358                                                 dreg = X86_ESI;
3359                                 } else {
3360                                         dreg = X86_EBX;
3361                                         if (ins->sreg2 == X86_EBX)
3362                                                 dreg = X86_EDI;
3363                                 }
3364                         }
3365
3366                         if (dreg != ins->dreg) {
3367                                 x86_push_reg (code, dreg);
3368                         }
3369
3370                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3371                         x86_prefix (code, X86_LOCK_PREFIX);
3372                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3373                         /* dreg contains the old value, add with sreg2 value */
3374                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3375                         
3376                         if (ins->dreg != dreg) {
3377                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3378                                 x86_pop_reg (code, dreg);
3379                         }
3380
3381                         break;
3382                 }
3383                 case OP_ATOMIC_EXCHANGE_I4: {
3384                         guchar *br[2];
3385                         int sreg2 = ins->sreg2;
3386                         int breg = ins->inst_basereg;
3387
3388                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3389                          * hack to overcome limits in x86 reg allocator 
3390                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3391                          */
3392                         if (ins->dreg != X86_EAX)
3393                                 x86_push_reg (code, X86_EAX);
3394                         
3395                         /* We need the EAX reg for the cmpxchg */
3396                         if (ins->sreg2 == X86_EAX) {
3397                                 x86_push_reg (code, X86_EDX);
3398                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3399                                 sreg2 = X86_EDX;
3400                         }
3401
3402                         if (breg == X86_EAX) {
3403                                 x86_push_reg (code, X86_ESI);
3404                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3405                                 breg = X86_ESI;
3406                         }
3407
3408                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3409
3410                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3411                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3412                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3413                         x86_patch (br [1], br [0]);
3414
3415                         if (breg != ins->inst_basereg)
3416                                 x86_pop_reg (code, X86_ESI);
3417
3418                         if (ins->dreg != X86_EAX) {
3419                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3420                                 x86_pop_reg (code, X86_EAX);
3421                         }
3422
3423                         if (ins->sreg2 != sreg2)
3424                                 x86_pop_reg (code, X86_EDX);
3425
3426                         break;
3427                 }
3428                 default:
3429                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3430                         g_assert_not_reached ();
3431                 }
3432
3433                 if ((code - cfg->native_code - offset) > max_len) {
3434                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3435                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3436                         g_assert_not_reached ();
3437                 }
3438                
3439                 cpos += max_len;
3440
3441                 last_ins = ins;
3442                 last_offset = offset;
3443                 
3444                 ins = ins->next;
3445         }
3446
3447         cfg->code_len = code - cfg->native_code;
3448 }
3449
3450 void
3451 mono_arch_register_lowlevel_calls (void)
3452 {
3453 }
3454
3455 void
3456 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3457 {
3458         MonoJumpInfo *patch_info;
3459         gboolean compile_aot = !run_cctors;
3460
3461         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3462                 unsigned char *ip = patch_info->ip.i + code;
3463                 const unsigned char *target;
3464
3465                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3466
3467                 if (compile_aot) {
3468                         switch (patch_info->type) {
3469                         case MONO_PATCH_INFO_BB:
3470                         case MONO_PATCH_INFO_LABEL:
3471                                 break;
3472                         default:
3473                                 /* No need to patch these */
3474                                 continue;
3475                         }
3476                 }
3477
3478                 switch (patch_info->type) {
3479                 case MONO_PATCH_INFO_IP:
3480                         *((gconstpointer *)(ip)) = target;
3481                         break;
3482                 case MONO_PATCH_INFO_CLASS_INIT: {
3483                         guint8 *code = ip;
3484                         /* Might already been changed to a nop */
3485                         x86_call_code (code, 0);
3486                         x86_patch (ip, target);
3487                         break;
3488                 }
3489                 case MONO_PATCH_INFO_ABS:
3490                 case MONO_PATCH_INFO_METHOD:
3491                 case MONO_PATCH_INFO_METHOD_JUMP:
3492                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3493                 case MONO_PATCH_INFO_BB:
3494                 case MONO_PATCH_INFO_LABEL:
3495                         x86_patch (ip, target);
3496                         break;
3497                 case MONO_PATCH_INFO_NONE:
3498                         break;
3499                 default: {
3500                         guint32 offset = mono_arch_get_patch_offset (ip);
3501                         *((gconstpointer *)(ip + offset)) = target;
3502                         break;
3503                 }
3504                 }
3505         }
3506 }
3507
3508 guint8 *
3509 mono_arch_emit_prolog (MonoCompile *cfg)
3510 {
3511         MonoMethod *method = cfg->method;
3512         MonoBasicBlock *bb;
3513         MonoMethodSignature *sig;
3514         MonoInst *inst;
3515         int alloc_size, pos, max_offset, i;
3516         guint8 *code;
3517
3518         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3519         code = cfg->native_code = g_malloc (cfg->code_size);
3520
3521         x86_push_reg (code, X86_EBP);
3522         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3523
3524         alloc_size = cfg->stack_offset;
3525         pos = 0;
3526
3527         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3528                 /* Might need to attach the thread to the JIT */
3529                 if (lmf_tls_offset != -1) {
3530                         guint8 *buf;
3531
3532                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3533                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3534                         buf = code;
3535                         x86_branch8 (code, X86_CC_NE, 0, 0);
3536                         x86_push_imm (code, cfg->domain);
3537                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3538                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3539                         x86_patch (buf, code);
3540 #ifdef PLATFORM_WIN32
3541                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3542                         /* FIXME: Add a separate key for LMF to avoid this */
3543                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3544 #endif
3545                 } else {
3546                         g_assert (!cfg->compile_aot);
3547                         x86_push_imm (code, cfg->domain);
3548                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3549                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3550                 }
3551         }
3552
3553         if (method->save_lmf) {
3554                 pos += sizeof (MonoLMF);
3555
3556                 /* save the current IP */
3557                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3558                 x86_push_imm_template (code);
3559
3560                 /* save all caller saved regs */
3561                 x86_push_reg (code, X86_EBP);
3562                 x86_push_reg (code, X86_ESI);
3563                 x86_push_reg (code, X86_EDI);
3564                 x86_push_reg (code, X86_EBX);
3565
3566                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3567                         /*
3568                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3569                          * through the mono_lmf_addr TLS variable.
3570                          */
3571                         /* %eax = previous_lmf */
3572                         x86_prefix (code, X86_GS_PREFIX);
3573                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3574                         /* skip method_info + lmf */
3575                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3576                         /* push previous_lmf */
3577                         x86_push_reg (code, X86_EAX);
3578                         /* new lmf = ESP */
3579                         x86_prefix (code, X86_GS_PREFIX);
3580                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
3581                 } else {
3582                         /* get the address of lmf for the current thread */
3583                         /* 
3584                          * This is performance critical so we try to use some tricks to make
3585                          * it fast.
3586                          */                                                                        
3587
3588                         if (lmf_addr_tls_offset != -1) {
3589                                 /* Load lmf quicky using the GS register */
3590                                 code = emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
3591 #ifdef PLATFORM_WIN32
3592                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3593                                 /* FIXME: Add a separate key for LMF to avoid this */
3594                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3595 #endif
3596                         } else {
3597                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3598                         }
3599
3600                         /* Skip method info */
3601                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3602
3603                         /* push lmf */
3604                         x86_push_reg (code, X86_EAX); 
3605                         /* push *lfm (previous_lmf) */
3606                         x86_push_membase (code, X86_EAX, 0);
3607                         /* *(lmf) = ESP */
3608                         x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3609                 }
3610         } else {
3611
3612                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3613                         x86_push_reg (code, X86_EBX);
3614                         pos += 4;
3615                 }
3616
3617                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3618                         x86_push_reg (code, X86_EDI);
3619                         pos += 4;
3620                 }
3621
3622                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3623                         x86_push_reg (code, X86_ESI);
3624                         pos += 4;
3625                 }
3626         }
3627
3628         alloc_size -= pos;
3629
3630 #if __APPLE__
3631         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3632         {
3633                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3634                 if (tot & 4) {
3635                         tot += 4;
3636                         alloc_size += 4;
3637                 }
3638                 if (tot & 8) {
3639                         alloc_size += 8;
3640                 }
3641         }
3642 #endif
3643
3644         if (alloc_size) {
3645                 /* See mono_emit_stack_alloc */
3646 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3647                 guint32 remaining_size = alloc_size;
3648                 while (remaining_size >= 0x1000) {
3649                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3650                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3651                         remaining_size -= 0x1000;
3652                 }
3653                 if (remaining_size)
3654                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3655 #else
3656                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3657 #endif
3658         }
3659
3660 #if __APPLE_
3661         /* check the stack is aligned */
3662         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3663         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3664         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3665         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3666         x86_breakpoint (code);
3667 #endif
3668
3669         /* compute max_offset in order to use short forward jumps */
3670         max_offset = 0;
3671         if (cfg->opt & MONO_OPT_BRANCH) {
3672                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3673                         MonoInst *ins = bb->code;
3674                         bb->max_offset = max_offset;
3675
3676                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3677                                 max_offset += 6;
3678                         /* max alignment for loops */
3679                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3680                                 max_offset += LOOP_ALIGNMENT;
3681
3682                         while (ins) {
3683                                 if (ins->opcode == OP_LABEL)
3684                                         ins->inst_c1 = max_offset;
3685                                 
3686                                 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3687                                 ins = ins->next;
3688                         }
3689                 }
3690         }
3691
3692         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3693                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3694
3695         /* load arguments allocated to register from the stack */
3696         sig = mono_method_signature (method);
3697         pos = 0;
3698
3699         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3700                 inst = cfg->args [pos];
3701                 if (inst->opcode == OP_REGVAR) {
3702                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3703                         if (cfg->verbose_level > 2)
3704                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3705                 }
3706                 pos++;
3707         }
3708
3709         cfg->code_len = code - cfg->native_code;
3710
3711         return code;
3712 }
3713
3714 void
3715 mono_arch_emit_epilog (MonoCompile *cfg)
3716 {
3717         MonoMethod *method = cfg->method;
3718         MonoMethodSignature *sig = mono_method_signature (method);
3719         int quad, pos;
3720         guint32 stack_to_pop;
3721         guint8 *code;
3722         int max_epilog_size = 16;
3723         CallInfo *cinfo;
3724         
3725         if (cfg->method->save_lmf)
3726                 max_epilog_size += 128;
3727         
3728         if (mono_jit_trace_calls != NULL)
3729                 max_epilog_size += 50;
3730
3731         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3732                 cfg->code_size *= 2;
3733                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3734                 mono_jit_stats.code_reallocs++;
3735         }
3736
3737         code = cfg->native_code + cfg->code_len;
3738
3739         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3740                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3741
3742         /* the code restoring the registers must be kept in sync with OP_JMP */
3743         pos = 0;
3744         
3745         if (method->save_lmf) {
3746                 gint32 prev_lmf_reg;
3747                 gint32 lmf_offset = -sizeof (MonoLMF);
3748
3749                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3750                         /*
3751                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3752                          * through the mono_lmf_addr TLS variable.
3753                          */
3754                         /* reg = previous_lmf */
3755                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3756
3757                         /* lmf = previous_lmf */
3758                         x86_prefix (code, X86_GS_PREFIX);
3759                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
3760                 } else {
3761                         /* Find a spare register */
3762                         switch (sig->ret->type) {
3763                         case MONO_TYPE_I8:
3764                         case MONO_TYPE_U8:
3765                                 prev_lmf_reg = X86_EDI;
3766                                 cfg->used_int_regs |= (1 << X86_EDI);
3767                                 break;
3768                         default:
3769                                 prev_lmf_reg = X86_EDX;
3770                                 break;
3771                         }
3772
3773                         /* reg = previous_lmf */
3774                         x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3775
3776                         /* ecx = lmf */
3777                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3778
3779                         /* *(lmf) = previous_lmf */
3780                         x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3781                 }
3782
3783                 /* restore caller saved regs */
3784                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3785                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3786                 }
3787
3788                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3789                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3790                 }
3791                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3792                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3793                 }
3794
3795                 /* EBP is restored by LEAVE */
3796         } else {
3797                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3798                         pos -= 4;
3799                 }
3800                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3801                         pos -= 4;
3802                 }
3803                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3804                         pos -= 4;
3805                 }
3806
3807                 if (pos)
3808                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3809
3810                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3811                         x86_pop_reg (code, X86_ESI);
3812                 }
3813                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3814                         x86_pop_reg (code, X86_EDI);
3815                 }
3816                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3817                         x86_pop_reg (code, X86_EBX);
3818                 }
3819         }
3820
3821         /* Load returned vtypes into registers if needed */
3822         cinfo = get_call_info (cfg->mempool, sig, FALSE);
3823         if (cinfo->ret.storage == ArgValuetypeInReg) {
3824                 for (quad = 0; quad < 2; quad ++) {
3825                         switch (cinfo->ret.pair_storage [quad]) {
3826                         case ArgInIReg:
3827                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3828                                 break;
3829                         case ArgOnFloatFpStack:
3830                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3831                                 break;
3832                         case ArgOnDoubleFpStack:
3833                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3834                                 break;
3835                         case ArgNone:
3836                                 break;
3837                         default:
3838                                 g_assert_not_reached ();
3839                         }
3840                 }
3841         }
3842
3843         x86_leave (code);
3844
3845         if (CALLCONV_IS_STDCALL (sig)) {
3846                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3847
3848                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3849         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3850                 stack_to_pop = 4;
3851         else
3852                 stack_to_pop = 0;
3853
3854         if (stack_to_pop)
3855                 x86_ret_imm (code, stack_to_pop);
3856         else
3857                 x86_ret (code);
3858
3859         cfg->code_len = code - cfg->native_code;
3860
3861         g_assert (cfg->code_len < cfg->code_size);
3862 }
3863
3864 void
3865 mono_arch_emit_exceptions (MonoCompile *cfg)
3866 {
3867         MonoJumpInfo *patch_info;
3868         int nthrows, i;
3869         guint8 *code;
3870         MonoClass *exc_classes [16];
3871         guint8 *exc_throw_start [16], *exc_throw_end [16];
3872         guint32 code_size;
3873         int exc_count = 0;
3874
3875         /* Compute needed space */
3876         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3877                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3878                         exc_count++;
3879         }
3880
3881         /* 
3882          * make sure we have enough space for exceptions
3883          * 16 is the size of two push_imm instructions and a call
3884          */
3885         if (cfg->compile_aot)
3886                 code_size = exc_count * 32;
3887         else
3888                 code_size = exc_count * 16;
3889
3890         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3891                 cfg->code_size *= 2;
3892                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3893                 mono_jit_stats.code_reallocs++;
3894         }
3895
3896         code = cfg->native_code + cfg->code_len;
3897
3898         nthrows = 0;
3899         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3900                 switch (patch_info->type) {
3901                 case MONO_PATCH_INFO_EXC: {
3902                         MonoClass *exc_class;
3903                         guint8 *buf, *buf2;
3904                         guint32 throw_ip;
3905
3906                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3907
3908                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3909                         g_assert (exc_class);
3910                         throw_ip = patch_info->ip.i;
3911
3912                         /* Find a throw sequence for the same exception class */
3913                         for (i = 0; i < nthrows; ++i)
3914                                 if (exc_classes [i] == exc_class)
3915                                         break;
3916                         if (i < nthrows) {
3917                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3918                                 x86_jump_code (code, exc_throw_start [i]);
3919                                 patch_info->type = MONO_PATCH_INFO_NONE;
3920                         }
3921                         else {
3922                                 guint32 size;
3923
3924                                 /* Compute size of code following the push <OFFSET> */
3925                                 size = 5 + 5;
3926
3927                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3928                                         /* Use the shorter form */
3929                                         buf = buf2 = code;
3930                                         x86_push_imm (code, 0);
3931                                 }
3932                                 else {
3933                                         buf = code;
3934                                         x86_push_imm (code, 0xf0f0f0f0);
3935                                         buf2 = code;
3936                                 }
3937
3938                                 if (nthrows < 16) {
3939                                         exc_classes [nthrows] = exc_class;
3940                                         exc_throw_start [nthrows] = code;
3941                                 }
3942
3943                                 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
3944                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3945                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3946                                 patch_info->ip.i = code - cfg->native_code;
3947                                 x86_call_code (code, 0);
3948                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3949                                 while (buf < buf2)
3950                                         x86_nop (buf);
3951
3952                                 if (nthrows < 16) {
3953                                         exc_throw_end [nthrows] = code;
3954                                         nthrows ++;
3955                                 }
3956                         }
3957                         break;
3958                 }
3959                 default:
3960                         /* do nothing */
3961                         break;
3962                 }
3963         }
3964
3965         cfg->code_len = code - cfg->native_code;
3966
3967         g_assert (cfg->code_len < cfg->code_size);
3968 }
3969
3970 void
3971 mono_arch_flush_icache (guint8 *code, gint size)
3972 {
3973         /* not needed */
3974 }
3975
3976 void
3977 mono_arch_flush_register_windows (void)
3978 {
3979 }
3980
3981 /*
3982  * Support for fast access to the thread-local lmf structure using the GS
3983  * segment register on NPTL + kernel 2.6.x.
3984  */
3985
3986 static gboolean tls_offset_inited = FALSE;
3987
3988 void
3989 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3990 {
3991         if (!tls_offset_inited) {
3992                 if (!getenv ("MONO_NO_TLS")) {
3993 #ifdef PLATFORM_WIN32
3994                         /* 
3995                          * We need to init this multiple times, since when we are first called, the key might not
3996                          * be initialized yet.
3997                          */
3998                         appdomain_tls_offset = mono_domain_get_tls_key ();
3999                         lmf_tls_offset = mono_get_jit_tls_key ();
4000                         thread_tls_offset = mono_thread_get_tls_key ();
4001
4002                         /* Only 64 tls entries can be accessed using inline code */
4003                         if (appdomain_tls_offset >= 64)
4004                                 appdomain_tls_offset = -1;
4005                         if (lmf_tls_offset >= 64)
4006                                 lmf_tls_offset = -1;
4007                         if (thread_tls_offset >= 64)
4008                                 thread_tls_offset = -1;
4009 #else
4010 #if MONO_XEN_OPT
4011                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
4012 #endif
4013                         tls_offset_inited = TRUE;
4014                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4015                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4016                         lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
4017                         thread_tls_offset = mono_thread_get_tls_offset ();
4018 #endif
4019                 }
4020         }               
4021 }
4022
4023 void
4024 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4025 {
4026 }
4027
4028 void
4029 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4030 {
4031         MonoCallInst *call = (MonoCallInst*)inst;
4032         CallInfo *cinfo = get_call_info (cfg->mempool, inst->signature, FALSE);
4033
4034         /* add the this argument */
4035         if (this_reg != -1) {
4036                 if (cinfo->args [0].storage == ArgInIReg) {
4037                         MonoInst *this;
4038                         MONO_INST_NEW (cfg, this, OP_MOVE);
4039                         this->type = this_type;
4040                         this->sreg1 = this_reg;
4041                         this->dreg = mono_regstate_next_int (cfg->rs);
4042                         mono_bblock_add_inst (cfg->cbb, this);
4043
4044                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
4045                 }
4046                 else {
4047                         MonoInst *this;
4048                         MONO_INST_NEW (cfg, this, OP_OUTARG);
4049                         this->type = this_type;
4050                         this->sreg1 = this_reg;
4051                         mono_bblock_add_inst (cfg->cbb, this);
4052                 }
4053         }
4054
4055         if (vt_reg != -1) {
4056                 MonoInst *vtarg;
4057
4058                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4059                         /*
4060                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4061                          * the stack. Save the address here, so the call instruction can
4062                          * access it.
4063                          */
4064                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4065                         vtarg->inst_destbasereg = X86_ESP;
4066                         vtarg->inst_offset = inst->stack_usage;
4067                         vtarg->sreg1 = vt_reg;
4068                         mono_bblock_add_inst (cfg->cbb, vtarg);
4069                 }
4070                 else if (cinfo->ret.storage == ArgInIReg) {
4071                         /* The return address is passed in a register */
4072                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
4073                         vtarg->sreg1 = vt_reg;
4074                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
4075                         mono_bblock_add_inst (cfg->cbb, vtarg);
4076
4077                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
4078                 } else {
4079                         MonoInst *vtarg;
4080                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4081                         vtarg->type = STACK_MP;
4082                         vtarg->sreg1 = vt_reg;
4083                         mono_bblock_add_inst (cfg->cbb, vtarg);
4084                 }
4085         }
4086 }
4087
4088 MonoInst*
4089 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4090 {
4091         MonoInst *ins = NULL;
4092
4093         if (cmethod->klass == mono_defaults.math_class) {
4094                 if (strcmp (cmethod->name, "Sin") == 0) {
4095                         MONO_INST_NEW (cfg, ins, OP_SIN);
4096                         ins->inst_i0 = args [0];
4097                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4098                         MONO_INST_NEW (cfg, ins, OP_COS);
4099                         ins->inst_i0 = args [0];
4100                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4101                         MONO_INST_NEW (cfg, ins, OP_TAN);
4102                         ins->inst_i0 = args [0];
4103                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4104                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4105                         ins->inst_i0 = args [0];
4106                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4107                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4108                         ins->inst_i0 = args [0];
4109                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4110                         MONO_INST_NEW (cfg, ins, OP_ABS);
4111                         ins->inst_i0 = args [0];
4112                 }
4113 #if 0
4114                 /* OP_FREM is not IEEE compatible */
4115                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4116                         MONO_INST_NEW (cfg, ins, OP_FREM);
4117                         ins->inst_i0 = args [0];
4118                         ins->inst_i1 = args [1];
4119                 }
4120 #endif
4121         } else if (cmethod->klass == mono_defaults.thread_class &&
4122                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
4123                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
4124         } else if(cmethod->klass->image == mono_defaults.corlib &&
4125                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4126                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4127
4128                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4129                         MonoInst *ins_iconst;
4130
4131                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4132                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4133                         ins_iconst->inst_c0 = 1;
4134
4135                         ins->inst_i0 = args [0];
4136                         ins->inst_i1 = ins_iconst;
4137                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4138                         MonoInst *ins_iconst;
4139
4140                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4141                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4142                         ins_iconst->inst_c0 = -1;
4143
4144                         ins->inst_i0 = args [0];
4145                         ins->inst_i1 = ins_iconst;
4146                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4147                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4148
4149                         ins->inst_i0 = args [0];
4150                         ins->inst_i1 = args [1];
4151                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4152                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4153
4154                         ins->inst_i0 = args [0];
4155                         ins->inst_i1 = args [1];
4156                 }
4157         }
4158
4159         return ins;
4160 }
4161
4162
4163 gboolean
4164 mono_arch_print_tree (MonoInst *tree, int arity)
4165 {
4166         return 0;
4167 }
4168
4169 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4170 {
4171         MonoInst* ins;
4172         
4173         if (appdomain_tls_offset == -1)
4174                 return NULL;
4175
4176         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4177         ins->inst_offset = appdomain_tls_offset;
4178         return ins;
4179 }
4180
4181 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4182 {
4183         MonoInst* ins;
4184
4185         if (thread_tls_offset == -1)
4186                 return NULL;
4187
4188         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4189         ins->inst_offset = thread_tls_offset;
4190         return ins;
4191 }
4192
4193 guint32
4194 mono_arch_get_patch_offset (guint8 *code)
4195 {
4196         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4197                 return 2;
4198         else if ((code [0] == 0xba))
4199                 return 1;
4200         else if ((code [0] == 0x68))
4201                 /* push IMM */
4202                 return 1;
4203         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4204                 /* push <OFFSET>(<REG>) */
4205                 return 2;
4206         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4207                 /* call *<OFFSET>(<REG>) */
4208                 return 2;
4209         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4210                 /* fldl <ADDR> */
4211                 return 2;
4212         else if ((code [0] == 0x58) && (code [1] == 0x05))
4213                 /* pop %eax; add <OFFSET>, %eax */
4214                 return 2;
4215         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4216                 /* pop <REG>; add <OFFSET>, <REG> */
4217                 return 3;
4218         else {
4219                 g_assert_not_reached ();
4220                 return -1;
4221         }
4222 }
4223
4224 gpointer*
4225 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4226 {
4227         guint8 reg = 0;
4228         gint32 disp = 0;
4229
4230         /* go to the start of the call instruction
4231          *
4232          * address_byte = (m << 6) | (o << 3) | reg
4233          * call opcode: 0xff address_byte displacement
4234          * 0xff m=1,o=2 imm8
4235          * 0xff m=2,o=2 imm32
4236          */
4237         code -= 6;
4238
4239         /* 
4240          * A given byte sequence can match more than case here, so we have to be
4241          * really careful about the ordering of the cases. Longer sequences
4242          * come first.
4243          */
4244         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
4245                 /*
4246                  * This is an interface call
4247                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
4248                  * ff 10                   call   *(%eax)
4249                  */
4250                 reg = x86_modrm_rm (code [5]);
4251                 disp = 0;
4252         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4253                 reg = code [4] & 0x07;
4254                 disp = (signed char)code [5];
4255         } else {
4256                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4257                         reg = code [1] & 0x07;
4258                         disp = *((gint32*)(code + 2));
4259                 } else if ((code [1] == 0xe8)) {
4260                         return NULL;
4261                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4262                         /*
4263                          * This is a interface call
4264                          * 8b 40 30   mov    0x30(%eax),%eax
4265                          * ff 10      call   *(%eax)
4266                          */
4267                         disp = 0;
4268                         reg = code [5] & 0x07;
4269                 }
4270                 else
4271                         return NULL;
4272         }
4273
4274         return (gpointer*)(((gint32)(regs [reg])) + disp);
4275 }
4276
4277 gpointer
4278 mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code)
4279 {
4280         guint32 esp = regs [X86_ESP];
4281         CallInfo *cinfo;
4282         gpointer res;
4283
4284         cinfo = get_call_info (NULL, sig, FALSE);
4285
4286         /*
4287          * The stack looks like:
4288          * <other args>
4289          * <this=delegate>
4290          * <possible vtype return address>
4291          * <return addr>
4292          * <4 pointers pushed by mono_arch_create_trampoline_code ()>
4293          */
4294         res = (((MonoObject**)esp) [5 + (cinfo->args [0].offset / 4)]);
4295         g_free (cinfo);
4296         return res;
4297 }
4298
4299 gpointer
4300 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
4301 {
4302         guint8 *code, *start;
4303         MonoDomain *domain = mono_domain_get ();
4304
4305         /* FIXME: Support more cases */
4306         if (MONO_TYPE_ISSTRUCT (sig->ret))
4307                 return NULL;
4308
4309         /*
4310          * The stack contains:
4311          * <delegate>
4312          * <return addr>
4313          */
4314
4315         if (has_target) {
4316                 mono_domain_lock (domain);
4317                 start = code = mono_code_manager_reserve (domain->code_mp, 64);
4318                 mono_domain_unlock (domain);
4319
4320                 /* Replace the this argument with the target */
4321                 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4322                 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
4323                 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
4324                 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4325
4326                 g_assert ((code - start) < 64);
4327         } else {
4328                 if (sig->param_count == 0) {
4329                         mono_domain_lock (domain);
4330                         start = code = mono_code_manager_reserve (domain->code_mp, 32 + (sig->param_count * 8));
4331                         mono_domain_unlock (domain);
4332                 
4333                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4334                         x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4335                 } else {
4336                         /* 
4337                          * The code below does not work in the presence of exceptions, since it 
4338                          * creates a new frame.
4339                          */
4340                         start = NULL;
4341 #if 0
4342                         for (i = 0; i < sig->param_count; ++i)
4343                                 if (!mono_is_regsize_var (sig->params [i]))
4344                                         return NULL;
4345
4346                         mono_domain_lock (domain);
4347                         start = code = mono_code_manager_reserve (domain->code_mp, 32 + (sig->param_count * 8));
4348                         mono_domain_unlock (domain);
4349
4350                         /* Load this == delegate */
4351                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4352
4353                         /* Push arguments in opposite order, taking changes in ESP into account */
4354                         for (i = 0; i < sig->param_count; ++i)
4355                                 x86_push_membase (code, X86_ESP, 4 + (sig->param_count * 4));
4356
4357                         /* Call the delegate */
4358                         x86_call_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4359                         if (sig->param_count > 0)
4360                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, sig->param_count * 4);
4361                         x86_ret (code);
4362 #endif
4363                 }
4364         }
4365
4366         return start;
4367 }