95e39f6663efec9b6d7d44d4a4152a992831230a
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
17
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/utils/mono-math.h>
23
24 #include "trace.h"
25 #include "mini-x86.h"
26 #include "inssel.h"
27 #include "cpu-x86.h"
28
29 /* On windows, these hold the key returned by TlsAlloc () */
30 static gint lmf_tls_offset = -1;
31 static gint lmf_addr_tls_offset = -1;
32 static gint appdomain_tls_offset = -1;
33 static gint thread_tls_offset = -1;
34
35 #ifdef MONO_XEN_OPT
36 static gboolean optimize_for_xen = TRUE;
37 #else
38 #define optimize_for_xen 0
39 #endif
40
41 #ifdef PLATFORM_WIN32
42 static gboolean is_win32 = TRUE;
43 #else
44 static gboolean is_win32 = FALSE;
45 #endif
46
47 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
48
49 #define ARGS_OFFSET 8
50
51 #ifdef PLATFORM_WIN32
52 /* Under windows, the default pinvoke calling convention is stdcall */
53 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
54 #else
55 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
56 #endif
57
58 #define NOT_IMPLEMENTED g_assert_not_reached ()
59
60 const char*
61 mono_arch_regname (int reg) {
62         switch (reg) {
63         case X86_EAX: return "%eax";
64         case X86_EBX: return "%ebx";
65         case X86_ECX: return "%ecx";
66         case X86_EDX: return "%edx";
67         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
68         case X86_EDI: return "%edi";
69         case X86_ESI: return "%esi";
70         }
71         return "unknown";
72 }
73
74 const char*
75 mono_arch_fregname (int reg) {
76         return "unknown";
77 }
78
79 typedef enum {
80         ArgInIReg,
81         ArgInFloatSSEReg,
82         ArgInDoubleSSEReg,
83         ArgOnStack,
84         ArgValuetypeInReg,
85         ArgOnFloatFpStack,
86         ArgOnDoubleFpStack,
87         ArgNone
88 } ArgStorage;
89
90 typedef struct {
91         gint16 offset;
92         gint8  reg;
93         ArgStorage storage;
94
95         /* Only if storage == ArgValuetypeInReg */
96         ArgStorage pair_storage [2];
97         gint8 pair_regs [2];
98 } ArgInfo;
99
100 typedef struct {
101         int nargs;
102         guint32 stack_usage;
103         guint32 reg_usage;
104         guint32 freg_usage;
105         gboolean need_stack_align;
106         guint32 stack_align_amount;
107         ArgInfo ret;
108         ArgInfo sig_cookie;
109         ArgInfo args [1];
110 } CallInfo;
111
112 #define PARAM_REGS 0
113
114 #define FLOAT_PARAM_REGS 0
115
116 static X86_Reg_No param_regs [] = { 0 };
117
118 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
119 #define SMALL_STRUCTS_IN_REGS
120 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
121 #endif
122
123 static void inline
124 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
125 {
126     ainfo->offset = *stack_size;
127
128     if (*gr >= PARAM_REGS) {
129                 ainfo->storage = ArgOnStack;
130                 (*stack_size) += sizeof (gpointer);
131     }
132     else {
133                 ainfo->storage = ArgInIReg;
134                 ainfo->reg = param_regs [*gr];
135                 (*gr) ++;
136     }
137 }
138
139 static void inline
140 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
141 {
142         ainfo->offset = *stack_size;
143
144         g_assert (PARAM_REGS == 0);
145         
146         ainfo->storage = ArgOnStack;
147         (*stack_size) += sizeof (gpointer) * 2;
148 }
149
150 static void inline
151 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
152 {
153     ainfo->offset = *stack_size;
154
155     if (*gr >= FLOAT_PARAM_REGS) {
156                 ainfo->storage = ArgOnStack;
157                 (*stack_size) += is_double ? 8 : 4;
158     }
159     else {
160                 /* A double register */
161                 if (is_double)
162                         ainfo->storage = ArgInDoubleSSEReg;
163                 else
164                         ainfo->storage = ArgInFloatSSEReg;
165                 ainfo->reg = *gr;
166                 (*gr) += 1;
167     }
168 }
169
170
171 static void
172 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
173                gboolean is_return,
174                guint32 *gr, guint32 *fr, guint32 *stack_size)
175 {
176         guint32 size;
177         MonoClass *klass;
178
179         klass = mono_class_from_mono_type (type);
180         if (sig->pinvoke) 
181                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
182         else 
183                 size = mono_type_stack_size (&klass->byval_arg, NULL);
184
185 #ifdef SMALL_STRUCTS_IN_REGS
186         if (sig->pinvoke && is_return) {
187                 MonoMarshalType *info;
188
189                 /*
190                  * the exact rules are not very well documented, the code below seems to work with the 
191                  * code generated by gcc 3.3.3 -mno-cygwin.
192                  */
193                 info = mono_marshal_load_type_info (klass);
194                 g_assert (info);
195
196                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
197
198                 /* Special case structs with only a float member */
199                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
200                         ainfo->storage = ArgValuetypeInReg;
201                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
202                         return;
203                 }
204                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
205                         ainfo->storage = ArgValuetypeInReg;
206                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
207                         return;
208                 }               
209                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
210                         ainfo->storage = ArgValuetypeInReg;
211                         ainfo->pair_storage [0] = ArgInIReg;
212                         ainfo->pair_regs [0] = return_regs [0];
213                         if (info->native_size > 4) {
214                                 ainfo->pair_storage [1] = ArgInIReg;
215                                 ainfo->pair_regs [1] = return_regs [1];
216                         }
217                         return;
218                 }
219         }
220 #endif
221
222         ainfo->offset = *stack_size;
223         ainfo->storage = ArgOnStack;
224         *stack_size += ALIGN_TO (size, sizeof (gpointer));
225 }
226
227 /*
228  * get_call_info:
229  *
230  *  Obtain information about a call according to the calling convention.
231  * For x86 ELF, see the "System V Application Binary Interface Intel386 
232  * Architecture Processor Supplment, Fourth Edition" document for more
233  * information.
234  * For x86 win32, see ???.
235  */
236 static CallInfo*
237 get_call_info (MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
238 {
239         guint32 i, gr, fr;
240         MonoType *ret_type;
241         int n = sig->hasthis + sig->param_count;
242         guint32 stack_size = 0;
243         CallInfo *cinfo;
244
245         if (mp)
246                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
247         else
248                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
249
250         gr = 0;
251         fr = 0;
252
253         /* return value */
254         {
255                 ret_type = mono_type_get_underlying_type (sig->ret);
256                 switch (ret_type->type) {
257                 case MONO_TYPE_BOOLEAN:
258                 case MONO_TYPE_I1:
259                 case MONO_TYPE_U1:
260                 case MONO_TYPE_I2:
261                 case MONO_TYPE_U2:
262                 case MONO_TYPE_CHAR:
263                 case MONO_TYPE_I4:
264                 case MONO_TYPE_U4:
265                 case MONO_TYPE_I:
266                 case MONO_TYPE_U:
267                 case MONO_TYPE_PTR:
268                 case MONO_TYPE_FNPTR:
269                 case MONO_TYPE_CLASS:
270                 case MONO_TYPE_OBJECT:
271                 case MONO_TYPE_SZARRAY:
272                 case MONO_TYPE_ARRAY:
273                 case MONO_TYPE_STRING:
274                         cinfo->ret.storage = ArgInIReg;
275                         cinfo->ret.reg = X86_EAX;
276                         break;
277                 case MONO_TYPE_U8:
278                 case MONO_TYPE_I8:
279                         cinfo->ret.storage = ArgInIReg;
280                         cinfo->ret.reg = X86_EAX;
281                         break;
282                 case MONO_TYPE_R4:
283                         cinfo->ret.storage = ArgOnFloatFpStack;
284                         break;
285                 case MONO_TYPE_R8:
286                         cinfo->ret.storage = ArgOnDoubleFpStack;
287                         break;
288                 case MONO_TYPE_GENERICINST:
289                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
290                                 cinfo->ret.storage = ArgInIReg;
291                                 cinfo->ret.reg = X86_EAX;
292                                 break;
293                         }
294                         /* Fall through */
295                 case MONO_TYPE_VALUETYPE: {
296                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
297
298                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
299                         if (cinfo->ret.storage == ArgOnStack)
300                                 /* The caller passes the address where the value is stored */
301                                 add_general (&gr, &stack_size, &cinfo->ret);
302                         break;
303                 }
304                 case MONO_TYPE_TYPEDBYREF:
305                         /* Same as a valuetype with size 24 */
306                         add_general (&gr, &stack_size, &cinfo->ret);
307                         ;
308                         break;
309                 case MONO_TYPE_VOID:
310                         cinfo->ret.storage = ArgNone;
311                         break;
312                 default:
313                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
314                 }
315         }
316
317         /* this */
318         if (sig->hasthis)
319                 add_general (&gr, &stack_size, cinfo->args + 0);
320
321         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
322                 gr = PARAM_REGS;
323                 fr = FLOAT_PARAM_REGS;
324                 
325                 /* Emit the signature cookie just before the implicit arguments */
326                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
327         }
328
329         for (i = 0; i < sig->param_count; ++i) {
330                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
331                 MonoType *ptype;
332
333                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
334                         /* We allways pass the sig cookie on the stack for simplicity */
335                         /* 
336                          * Prevent implicit arguments + the sig cookie from being passed 
337                          * in registers.
338                          */
339                         gr = PARAM_REGS;
340                         fr = FLOAT_PARAM_REGS;
341
342                         /* Emit the signature cookie just before the implicit arguments */
343                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
344                 }
345
346                 if (sig->params [i]->byref) {
347                         add_general (&gr, &stack_size, ainfo);
348                         continue;
349                 }
350                 ptype = mono_type_get_underlying_type (sig->params [i]);
351                 switch (ptype->type) {
352                 case MONO_TYPE_BOOLEAN:
353                 case MONO_TYPE_I1:
354                 case MONO_TYPE_U1:
355                         add_general (&gr, &stack_size, ainfo);
356                         break;
357                 case MONO_TYPE_I2:
358                 case MONO_TYPE_U2:
359                 case MONO_TYPE_CHAR:
360                         add_general (&gr, &stack_size, ainfo);
361                         break;
362                 case MONO_TYPE_I4:
363                 case MONO_TYPE_U4:
364                         add_general (&gr, &stack_size, ainfo);
365                         break;
366                 case MONO_TYPE_I:
367                 case MONO_TYPE_U:
368                 case MONO_TYPE_PTR:
369                 case MONO_TYPE_FNPTR:
370                 case MONO_TYPE_CLASS:
371                 case MONO_TYPE_OBJECT:
372                 case MONO_TYPE_STRING:
373                 case MONO_TYPE_SZARRAY:
374                 case MONO_TYPE_ARRAY:
375                         add_general (&gr, &stack_size, ainfo);
376                         break;
377                 case MONO_TYPE_GENERICINST:
378                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
379                                 add_general (&gr, &stack_size, ainfo);
380                                 break;
381                         }
382                         /* Fall through */
383                 case MONO_TYPE_VALUETYPE:
384                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
385                         break;
386                 case MONO_TYPE_TYPEDBYREF:
387                         stack_size += sizeof (MonoTypedRef);
388                         ainfo->storage = ArgOnStack;
389                         break;
390                 case MONO_TYPE_U8:
391                 case MONO_TYPE_I8:
392                         add_general_pair (&gr, &stack_size, ainfo);
393                         break;
394                 case MONO_TYPE_R4:
395                         add_float (&fr, &stack_size, ainfo, FALSE);
396                         break;
397                 case MONO_TYPE_R8:
398                         add_float (&fr, &stack_size, ainfo, TRUE);
399                         break;
400                 default:
401                         g_error ("unexpected type 0x%x", ptype->type);
402                         g_assert_not_reached ();
403                 }
404         }
405
406         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
407                 gr = PARAM_REGS;
408                 fr = FLOAT_PARAM_REGS;
409                 
410                 /* Emit the signature cookie just before the implicit arguments */
411                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
412         }
413
414 #if defined(__APPLE__)
415         if ((stack_size % 16) != 0) { 
416                 cinfo->need_stack_align = TRUE;
417                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
418         }
419 #endif
420
421         cinfo->stack_usage = stack_size;
422         cinfo->reg_usage = gr;
423         cinfo->freg_usage = fr;
424         return cinfo;
425 }
426
427 /*
428  * mono_arch_get_argument_info:
429  * @csig:  a method signature
430  * @param_count: the number of parameters to consider
431  * @arg_info: an array to store the result infos
432  *
433  * Gathers information on parameters such as size, alignment and
434  * padding. arg_info should be large enought to hold param_count + 1 entries. 
435  *
436  * Returns the size of the activation frame.
437  */
438 int
439 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
440 {
441         int k, frame_size = 0;
442         int size, pad;
443         guint32 align;
444         int offset = 8;
445         CallInfo *cinfo;
446
447         cinfo = get_call_info (NULL, csig, FALSE);
448
449         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
450                 frame_size += sizeof (gpointer);
451                 offset += 4;
452         }
453
454         arg_info [0].offset = offset;
455
456         if (csig->hasthis) {
457                 frame_size += sizeof (gpointer);
458                 offset += 4;
459         }
460
461         arg_info [0].size = frame_size;
462
463         for (k = 0; k < param_count; k++) {
464                 
465                 if (csig->pinvoke)
466                         size = mono_type_native_stack_size (csig->params [k], &align);
467                 else {
468                         int ialign;
469                         size = mono_type_stack_size (csig->params [k], &ialign);
470                         align = ialign;
471                 }
472
473                 /* ignore alignment for now */
474                 align = 1;
475
476                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
477                 arg_info [k].pad = pad;
478                 frame_size += size;
479                 arg_info [k + 1].pad = 0;
480                 arg_info [k + 1].size = size;
481                 offset += pad;
482                 arg_info [k + 1].offset = offset;
483                 offset += size;
484         }
485
486         align = MONO_ARCH_FRAME_ALIGNMENT;
487         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
488         arg_info [k].pad = pad;
489
490         g_free (cinfo);
491
492         return frame_size;
493 }
494
495 static const guchar cpuid_impl [] = {
496         0x55,                           /* push   %ebp */
497         0x89, 0xe5,                     /* mov    %esp,%ebp */
498         0x53,                           /* push   %ebx */
499         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
500         0x0f, 0xa2,                     /* cpuid   */
501         0x50,                           /* push   %eax */
502         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
503         0x89, 0x18,                     /* mov    %ebx,(%eax) */
504         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
505         0x89, 0x08,                     /* mov    %ecx,(%eax) */
506         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
507         0x89, 0x10,                     /* mov    %edx,(%eax) */
508         0x58,                           /* pop    %eax */
509         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
510         0x89, 0x02,                     /* mov    %eax,(%edx) */
511         0x5b,                           /* pop    %ebx */
512         0xc9,                           /* leave   */
513         0xc3,                           /* ret     */
514 };
515
516 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
517
518 static int 
519 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
520 {
521         int have_cpuid = 0;
522 #ifndef _MSC_VER
523         __asm__  __volatile__ (
524                 "pushfl\n"
525                 "popl %%eax\n"
526                 "movl %%eax, %%edx\n"
527                 "xorl $0x200000, %%eax\n"
528                 "pushl %%eax\n"
529                 "popfl\n"
530                 "pushfl\n"
531                 "popl %%eax\n"
532                 "xorl %%edx, %%eax\n"
533                 "andl $0x200000, %%eax\n"
534                 "movl %%eax, %0"
535                 : "=r" (have_cpuid)
536                 :
537                 : "%eax", "%edx"
538         );
539 #else
540         __asm {
541                 pushfd
542                 pop eax
543                 mov edx, eax
544                 xor eax, 0x200000
545                 push eax
546                 popfd
547                 pushfd
548                 pop eax
549                 xor eax, edx
550                 and eax, 0x200000
551                 mov have_cpuid, eax
552         }
553 #endif
554         if (have_cpuid) {
555                 /* Have to use the code manager to get around WinXP DEP */
556                 static CpuidFunc func = NULL;
557                 void *ptr;
558                 if (!func) {
559                         ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
560                         memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
561                         func = (CpuidFunc)ptr;
562                 }
563                 func (id, p_eax, p_ebx, p_ecx, p_edx);
564
565                 /*
566                  * We use this approach because of issues with gcc and pic code, see:
567                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
568                 __asm__ __volatile__ ("cpuid"
569                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
570                         : "a" (id));
571                 */
572                 return 1;
573         }
574         return 0;
575 }
576
577 /*
578  * Initialize the cpu to execute managed code.
579  */
580 void
581 mono_arch_cpu_init (void)
582 {
583         /* spec compliance requires running with double precision */
584 #ifndef _MSC_VER
585         guint16 fpcw;
586
587         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
588         fpcw &= ~X86_FPCW_PRECC_MASK;
589         fpcw |= X86_FPCW_PREC_DOUBLE;
590         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
591         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
592 #else
593         _control87 (_PC_53, MCW_PC);
594 #endif
595 }
596
597 /*
598  * This function returns the optimizations supported on this cpu.
599  */
600 guint32
601 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
602 {
603         int eax, ebx, ecx, edx;
604         guint32 opts = 0;
605         
606         *exclude_mask = 0;
607         /* Feature Flags function, flags returned in EDX. */
608         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
609                 if (edx & (1 << 15)) {
610                         opts |= MONO_OPT_CMOV;
611                         if (edx & 1)
612                                 opts |= MONO_OPT_FCMOV;
613                         else
614                                 *exclude_mask |= MONO_OPT_FCMOV;
615                 } else
616                         *exclude_mask |= MONO_OPT_CMOV;
617                 if (edx & (1 << 26))
618                         opts |= MONO_OPT_SSE2;
619                 else
620                         *exclude_mask |= MONO_OPT_SSE2;
621         }
622         return opts;
623 }
624
625 /*
626  * Determine whenever the trap whose info is in SIGINFO is caused by
627  * integer overflow.
628  */
629 gboolean
630 mono_arch_is_int_overflow (void *sigctx, void *info)
631 {
632         MonoContext ctx;
633         guint8* ip;
634
635         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
636
637         ip = (guint8*)ctx.eip;
638
639         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
640                 gint32 reg;
641
642                 /* idiv REG */
643                 switch (x86_modrm_rm (ip [1])) {
644                 case X86_EAX:
645                         reg = ctx.eax;
646                         break;
647                 case X86_ECX:
648                         reg = ctx.ecx;
649                         break;
650                 case X86_EDX:
651                         reg = ctx.edx;
652                         break;
653                 case X86_EBX:
654                         reg = ctx.ebx;
655                         break;
656                 case X86_ESI:
657                         reg = ctx.esi;
658                         break;
659                 case X86_EDI:
660                         reg = ctx.edi;
661                         break;
662                 default:
663                         g_assert_not_reached ();
664                         reg = -1;
665                 }
666
667                 if (reg == -1)
668                         return TRUE;
669         }
670                         
671         return FALSE;
672 }
673
674 GList *
675 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
676 {
677         GList *vars = NULL;
678         int i;
679
680         for (i = 0; i < cfg->num_varinfo; i++) {
681                 MonoInst *ins = cfg->varinfo [i];
682                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
683
684                 /* unused vars */
685                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
686                         continue;
687
688                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
689                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
690                         continue;
691
692                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
693                  * 8bit quantities in caller saved registers on x86 */
694                 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
695                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
696                         g_assert (i == vmv->idx);
697                         vars = g_list_prepend (vars, vmv);
698                 }
699         }
700
701         vars = mono_varlist_sort (cfg, vars, 0);
702
703         return vars;
704 }
705
706 GList *
707 mono_arch_get_global_int_regs (MonoCompile *cfg)
708 {
709         GList *regs = NULL;
710
711         /* we can use 3 registers for global allocation */
712         regs = g_list_prepend (regs, (gpointer)X86_EBX);
713         regs = g_list_prepend (regs, (gpointer)X86_ESI);
714         regs = g_list_prepend (regs, (gpointer)X86_EDI);
715
716         return regs;
717 }
718
719 /*
720  * mono_arch_regalloc_cost:
721  *
722  *  Return the cost, in number of memory references, of the action of 
723  * allocating the variable VMV into a register during global register
724  * allocation.
725  */
726 guint32
727 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
728 {
729         MonoInst *ins = cfg->varinfo [vmv->idx];
730
731         if (cfg->method->save_lmf)
732                 /* The register is already saved */
733                 return (ins->opcode == OP_ARG) ? 1 : 0;
734         else
735                 /* push+pop+possible load if it is an argument */
736                 return (ins->opcode == OP_ARG) ? 3 : 2;
737 }
738  
739 /*
740  * Set var information according to the calling convention. X86 version.
741  * The locals var stuff should most likely be split in another method.
742  */
743 void
744 mono_arch_allocate_vars (MonoCompile *cfg)
745 {
746         MonoMethodSignature *sig;
747         MonoMethodHeader *header;
748         MonoInst *inst;
749         guint32 locals_stack_size, locals_stack_align;
750         int i, offset;
751         gint32 *offsets;
752         CallInfo *cinfo;
753
754         header = mono_method_get_header (cfg->method);
755         sig = mono_method_signature (cfg->method);
756
757         cinfo = get_call_info (cfg->mempool, sig, FALSE);
758
759         cfg->frame_reg = MONO_ARCH_BASEREG;
760         offset = 0;
761
762         /* Reserve space to save LMF and caller saved registers */
763
764         if (cfg->method->save_lmf) {
765                 offset += sizeof (MonoLMF);
766         } else {
767                 if (cfg->used_int_regs & (1 << X86_EBX)) {
768                         offset += 4;
769                 }
770
771                 if (cfg->used_int_regs & (1 << X86_EDI)) {
772                         offset += 4;
773                 }
774
775                 if (cfg->used_int_regs & (1 << X86_ESI)) {
776                         offset += 4;
777                 }
778         }
779
780         switch (cinfo->ret.storage) {
781         case ArgValuetypeInReg:
782                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
783                 offset += 8;
784                 cfg->ret->opcode = OP_REGOFFSET;
785                 cfg->ret->inst_basereg = X86_EBP;
786                 cfg->ret->inst_offset = - offset;
787                 break;
788         default:
789                 break;
790         }
791
792         /* Allocate locals */
793         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
794         if (locals_stack_align) {
795                 offset += (locals_stack_align - 1);
796                 offset &= ~(locals_stack_align - 1);
797         }
798         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
799                 if (offsets [i] != -1) {
800                         MonoInst *inst = cfg->varinfo [i];
801                         inst->opcode = OP_REGOFFSET;
802                         inst->inst_basereg = X86_EBP;
803                         inst->inst_offset = - (offset + offsets [i]);
804                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
805                 }
806         }
807         offset += locals_stack_size;
808
809
810         /*
811          * Allocate arguments+return value
812          */
813
814         switch (cinfo->ret.storage) {
815         case ArgOnStack:
816                 cfg->ret->opcode = OP_REGOFFSET;
817                 cfg->ret->inst_basereg = X86_EBP;
818                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
819                 break;
820         case ArgValuetypeInReg:
821                 break;
822         case ArgInIReg:
823                 cfg->ret->opcode = OP_REGVAR;
824                 cfg->ret->inst_c0 = cinfo->ret.reg;
825                 break;
826         case ArgNone:
827         case ArgOnFloatFpStack:
828         case ArgOnDoubleFpStack:
829                 break;
830         default:
831                 g_assert_not_reached ();
832         }
833
834         if (sig->call_convention == MONO_CALL_VARARG) {
835                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
836                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
837         }
838
839         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
840                 ArgInfo *ainfo = &cinfo->args [i];
841                 inst = cfg->args [i];
842                 if (inst->opcode != OP_REGVAR) {
843                         inst->opcode = OP_REGOFFSET;
844                         inst->inst_basereg = X86_EBP;
845                 }
846                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
847         }
848
849         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
850         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
851
852         cfg->stack_offset = offset;
853 }
854
855 void
856 mono_arch_create_vars (MonoCompile *cfg)
857 {
858         MonoMethodSignature *sig;
859         CallInfo *cinfo;
860
861         sig = mono_method_signature (cfg->method);
862
863         cinfo = get_call_info (cfg->mempool, sig, FALSE);
864
865         if (cinfo->ret.storage == ArgValuetypeInReg)
866                 cfg->ret_var_is_local = TRUE;
867 }
868
869 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
870  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
871  */
872
873 static void
874 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
875 {
876         MonoInst *arg;
877         MonoMethodSignature *tmp_sig;
878         MonoInst *sig_arg;
879
880         /* FIXME: Add support for signature tokens to AOT */
881         cfg->disable_aot = TRUE;
882         MONO_INST_NEW (cfg, arg, OP_OUTARG);
883
884         /*
885          * mono_ArgIterator_Setup assumes the signature cookie is 
886          * passed first and all the arguments which were before it are
887          * passed on the stack after the signature. So compensate by 
888          * passing a different signature.
889          */
890         tmp_sig = mono_metadata_signature_dup (call->signature);
891         tmp_sig->param_count -= call->signature->sentinelpos;
892         tmp_sig->sentinelpos = 0;
893         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
894
895         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
896         sig_arg->inst_p0 = tmp_sig;
897
898         arg->inst_left = sig_arg;
899         arg->type = STACK_PTR;
900         /* prepend, so they get reversed */
901         arg->next = call->out_args;
902         call->out_args = arg;
903 }
904
905 /* 
906  * take the arguments and generate the arch-specific
907  * instructions to properly call the function in call.
908  * This includes pushing, moving arguments to the right register
909  * etc.
910  */
911 MonoCallInst*
912 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
913         MonoInst *arg, *in;
914         MonoMethodSignature *sig;
915         int i, n;
916         CallInfo *cinfo;
917         int sentinelpos = 0;
918
919         sig = call->signature;
920         n = sig->param_count + sig->hasthis;
921
922         cinfo = get_call_info (cfg->mempool, sig, FALSE);
923
924         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
925                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
926
927         for (i = 0; i < n; ++i) {
928                 ArgInfo *ainfo = cinfo->args + i;
929
930                 /* Emit the signature cookie just before the implicit arguments */
931                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
932                         emit_sig_cookie (cfg, call);
933                 }
934
935                 if (is_virtual && i == 0) {
936                         /* the argument will be attached to the call instrucion */
937                         in = call->args [i];
938                 } else {
939                         MonoType *t;
940
941                         if (i >= sig->hasthis)
942                                 t = sig->params [i - sig->hasthis];
943                         else
944                                 t = &mono_defaults.int_class->byval_arg;
945                         t = mono_type_get_underlying_type (t);
946
947                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
948                         in = call->args [i];
949                         arg->cil_code = in->cil_code;
950                         arg->inst_left = in;
951                         arg->type = in->type;
952                         /* prepend, so they get reversed */
953                         arg->next = call->out_args;
954                         call->out_args = arg;
955
956                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
957                                 guint32 size, align;
958
959                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
960                                         size = sizeof (MonoTypedRef);
961                                         align = sizeof (gpointer);
962                                 }
963                                 else
964                                         if (sig->pinvoke)
965                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
966                                         else {
967                                                 int ialign;
968                                                 size = mono_type_stack_size (&in->klass->byval_arg, &ialign);
969                                                 align = ialign;
970                                         }
971                                 arg->opcode = OP_OUTARG_VT;
972                                 arg->klass = in->klass;
973                                 arg->backend.is_pinvoke = sig->pinvoke;
974                                 arg->inst_imm = size; 
975                         }
976                         else {
977                                 switch (ainfo->storage) {
978                                 case ArgOnStack:
979                                         arg->opcode = OP_OUTARG;
980                                         if (!t->byref) {
981                                                 if (t->type == MONO_TYPE_R4)
982                                                         arg->opcode = OP_OUTARG_R4;
983                                                 else
984                                                         if (t->type == MONO_TYPE_R8)
985                                                                 arg->opcode = OP_OUTARG_R8;
986                                         }
987                                         break;
988                                 default:
989                                         g_assert_not_reached ();
990                                 }
991                         }
992                 }
993         }
994
995         /* Handle the case where there are no implicit arguments */
996         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
997                 emit_sig_cookie (cfg, call);
998         }
999
1000         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1001                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1002                         MonoInst *zero_inst;
1003                         /*
1004                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1005                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1006                          * before calling the function. So we add a dummy instruction to represent pushing the 
1007                          * struct return address to the stack. The return address will be saved to this stack slot 
1008                          * by the code emitted in this_vret_args.
1009                          */
1010                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1011                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1012                         zero_inst->inst_p0 = 0;
1013                         arg->inst_left = zero_inst;
1014                         arg->type = STACK_PTR;
1015                         /* prepend, so they get reversed */
1016                         arg->next = call->out_args;
1017                         call->out_args = arg;
1018                 }
1019                 else
1020                         /* if the function returns a struct, the called method already does a ret $0x4 */
1021                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1022                                 cinfo->stack_usage -= 4;
1023         }
1024         
1025         call->stack_usage = cinfo->stack_usage;
1026
1027 #if defined(__APPLE__)
1028         if (cinfo->need_stack_align) {
1029                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1030                 arg->inst_c0 = cinfo->stack_align_amount;
1031                 arg->next = call->out_args;
1032                 call->out_args = arg;
1033         }
1034 #endif 
1035
1036         return call;
1037 }
1038
1039 /*
1040  * Allow tracing to work with this interface (with an optional argument)
1041  */
1042 void*
1043 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1044 {
1045         guchar *code = p;
1046
1047 #if __APPLE__
1048         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1049 #endif
1050
1051         /* if some args are passed in registers, we need to save them here */
1052         x86_push_reg (code, X86_EBP);
1053
1054         if (cfg->compile_aot) {
1055                 x86_push_imm (code, cfg->method);
1056                 x86_mov_reg_imm (code, X86_EAX, func);
1057                 x86_call_reg (code, X86_EAX);
1058         } else {
1059                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1060                 x86_push_imm (code, cfg->method);
1061                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1062                 x86_call_code (code, 0);
1063         }
1064 #if __APPLE__
1065         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 16);
1066 #else
1067         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1068 #endif
1069
1070         return code;
1071 }
1072
1073 enum {
1074         SAVE_NONE,
1075         SAVE_STRUCT,
1076         SAVE_EAX,
1077         SAVE_EAX_EDX,
1078         SAVE_FP
1079 };
1080
1081 void*
1082 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1083 {
1084         guchar *code = p;
1085         int arg_size = 0, save_mode = SAVE_NONE;
1086         MonoMethod *method = cfg->method;
1087         
1088         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1089         case MONO_TYPE_VOID:
1090                 /* special case string .ctor icall */
1091                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1092                         save_mode = SAVE_EAX;
1093                 else
1094                         save_mode = SAVE_NONE;
1095                 break;
1096         case MONO_TYPE_I8:
1097         case MONO_TYPE_U8:
1098                 save_mode = SAVE_EAX_EDX;
1099                 break;
1100         case MONO_TYPE_R4:
1101         case MONO_TYPE_R8:
1102                 save_mode = SAVE_FP;
1103                 break;
1104         case MONO_TYPE_GENERICINST:
1105                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1106                         save_mode = SAVE_EAX;
1107                         break;
1108                 }
1109                 /* Fall through */
1110         case MONO_TYPE_VALUETYPE:
1111                 save_mode = SAVE_STRUCT;
1112                 break;
1113         default:
1114                 save_mode = SAVE_EAX;
1115                 break;
1116         }
1117
1118         switch (save_mode) {
1119         case SAVE_EAX_EDX:
1120                 x86_push_reg (code, X86_EDX);
1121                 x86_push_reg (code, X86_EAX);
1122                 if (enable_arguments) {
1123                         x86_push_reg (code, X86_EDX);
1124                         x86_push_reg (code, X86_EAX);
1125                         arg_size = 8;
1126                 }
1127                 break;
1128         case SAVE_EAX:
1129                 x86_push_reg (code, X86_EAX);
1130                 if (enable_arguments) {
1131                         x86_push_reg (code, X86_EAX);
1132                         arg_size = 4;
1133                 }
1134                 break;
1135         case SAVE_FP:
1136                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1137                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1138                 if (enable_arguments) {
1139                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1140                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1141                         arg_size = 8;
1142                 }
1143                 break;
1144         case SAVE_STRUCT:
1145                 if (enable_arguments) {
1146                         x86_push_membase (code, X86_EBP, 8);
1147                         arg_size = 4;
1148                 }
1149                 break;
1150         case SAVE_NONE:
1151         default:
1152                 break;
1153         }
1154
1155         if (cfg->compile_aot) {
1156                 x86_push_imm (code, method);
1157                 x86_mov_reg_imm (code, X86_EAX, func);
1158                 x86_call_reg (code, X86_EAX);
1159         } else {
1160                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1161                 x86_push_imm (code, method);
1162                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1163                 x86_call_code (code, 0);
1164         }
1165         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1166
1167         switch (save_mode) {
1168         case SAVE_EAX_EDX:
1169                 x86_pop_reg (code, X86_EAX);
1170                 x86_pop_reg (code, X86_EDX);
1171                 break;
1172         case SAVE_EAX:
1173                 x86_pop_reg (code, X86_EAX);
1174                 break;
1175         case SAVE_FP:
1176                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1177                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1178                 break;
1179         case SAVE_NONE:
1180         default:
1181                 break;
1182         }
1183
1184         return code;
1185 }
1186
1187 #define EMIT_COND_BRANCH(ins,cond,sign) \
1188 if (ins->flags & MONO_INST_BRLABEL) { \
1189         if (ins->inst_i0->inst_c0) { \
1190                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1191         } else { \
1192                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1193                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1194                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1195                         x86_branch8 (code, cond, 0, sign); \
1196                 else \
1197                         x86_branch32 (code, cond, 0, sign); \
1198         } \
1199 } else { \
1200         if (ins->inst_true_bb->native_offset) { \
1201                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1202         } else { \
1203                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1204                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1205                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1206                         x86_branch8 (code, cond, 0, sign); \
1207                 else \
1208                         x86_branch32 (code, cond, 0, sign); \
1209         } \
1210 }
1211
1212 /*  
1213  *      Emit an exception if condition is fail and
1214  *  if possible do a directly branch to target 
1215  */
1216 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1217         do {                                                        \
1218                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1219                 if (tins == NULL) {                                                                             \
1220                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1221                                         MONO_PATCH_INFO_EXC, exc_name);  \
1222                         x86_branch32 (code, cond, 0, signed);               \
1223                 } else {        \
1224                         EMIT_COND_BRANCH (tins, cond, signed);  \
1225                 }                       \
1226         } while (0); 
1227
1228 #define EMIT_FPCOMPARE(code) do { \
1229         x86_fcompp (code); \
1230         x86_fnstsw (code); \
1231 } while (0); 
1232
1233
1234 static guint8*
1235 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1236 {
1237         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1238         x86_call_code (code, 0);
1239
1240         return code;
1241 }
1242
1243 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1244
1245 /*
1246  * peephole_pass_1:
1247  *
1248  *   Perform peephole opts which should/can be performed before local regalloc
1249  */
1250 static void
1251 peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1252 {
1253         MonoInst *ins, *last_ins = NULL;
1254         ins = bb->code;
1255
1256         while (ins) {
1257                 switch (ins->opcode) {
1258                 case OP_IADD_IMM:
1259                 case OP_ADD_IMM:
1260                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1261                                 /* 
1262                                  * X86_LEA is like ADD, but doesn't have the
1263                                  * sreg1==dreg restriction.
1264                                  */
1265                                 ins->opcode = OP_X86_LEA_MEMBASE;
1266                                 ins->inst_basereg = ins->sreg1;
1267                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1268                                 ins->opcode = OP_X86_INC_REG;
1269                         break;
1270                 case OP_SUB_IMM:
1271                 case OP_ISUB_IMM:
1272                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1273                                 ins->opcode = OP_X86_LEA_MEMBASE;
1274                                 ins->inst_basereg = ins->sreg1;
1275                                 ins->inst_imm = -ins->inst_imm;
1276                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1277                                 ins->opcode = OP_X86_DEC_REG;
1278                         break;
1279                 case OP_COMPARE_IMM:
1280                 case OP_ICOMPARE_IMM:
1281                         /* OP_COMPARE_IMM (reg, 0) 
1282                          * --> 
1283                          * OP_X86_TEST_NULL (reg) 
1284                          */
1285                         if (!ins->inst_imm)
1286                                 ins->opcode = OP_X86_TEST_NULL;
1287                         break;
1288                 case OP_X86_COMPARE_MEMBASE_IMM:
1289                         /* 
1290                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1291                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1292                          * -->
1293                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1294                          * OP_COMPARE_IMM reg, imm
1295                          *
1296                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1297                          */
1298                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1299                             ins->inst_basereg == last_ins->inst_destbasereg &&
1300                             ins->inst_offset == last_ins->inst_offset) {
1301                                         ins->opcode = OP_COMPARE_IMM;
1302                                         ins->sreg1 = last_ins->sreg1;
1303
1304                                         /* check if we can remove cmp reg,0 with test null */
1305                                         if (!ins->inst_imm)
1306                                                 ins->opcode = OP_X86_TEST_NULL;
1307                                 }
1308
1309                         break;
1310                 case OP_LOAD_MEMBASE:
1311                 case OP_LOADI4_MEMBASE:
1312                         /* 
1313                          * Note: if reg1 = reg2 the load op is removed
1314                          *
1315                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1316                          * OP_LOAD_MEMBASE offset(basereg), reg2
1317                          * -->
1318                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1319                          * OP_MOVE reg1, reg2
1320                          */
1321                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1322                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1323                             ins->inst_basereg == last_ins->inst_destbasereg &&
1324                             ins->inst_offset == last_ins->inst_offset) {
1325                                 if (ins->dreg == last_ins->sreg1) {
1326                                         last_ins->next = ins->next;                             
1327                                         ins = ins->next;                                
1328                                         continue;
1329                                 } else {
1330                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1331                                         ins->opcode = OP_MOVE;
1332                                         ins->sreg1 = last_ins->sreg1;
1333                                 }
1334
1335                         /* 
1336                          * Note: reg1 must be different from the basereg in the second load
1337                          * Note: if reg1 = reg2 is equal then second load is removed
1338                          *
1339                          * OP_LOAD_MEMBASE offset(basereg), reg1
1340                          * OP_LOAD_MEMBASE offset(basereg), reg2
1341                          * -->
1342                          * OP_LOAD_MEMBASE offset(basereg), reg1
1343                          * OP_MOVE reg1, reg2
1344                          */
1345                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1346                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1347                               ins->inst_basereg != last_ins->dreg &&
1348                               ins->inst_basereg == last_ins->inst_basereg &&
1349                               ins->inst_offset == last_ins->inst_offset) {
1350
1351                                 if (ins->dreg == last_ins->dreg) {
1352                                         last_ins->next = ins->next;                             
1353                                         ins = ins->next;                                
1354                                         continue;
1355                                 } else {
1356                                         ins->opcode = OP_MOVE;
1357                                         ins->sreg1 = last_ins->dreg;
1358                                 }
1359
1360                                 //g_assert_not_reached ();
1361
1362 #if 0
1363                         /* 
1364                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1365                          * OP_LOAD_MEMBASE offset(basereg), reg
1366                          * -->
1367                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1368                          * OP_ICONST reg, imm
1369                          */
1370                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1371                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1372                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1373                                    ins->inst_offset == last_ins->inst_offset) {
1374                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1375                                 ins->opcode = OP_ICONST;
1376                                 ins->inst_c0 = last_ins->inst_imm;
1377                                 g_assert_not_reached (); // check this rule
1378 #endif
1379                         }
1380                         break;
1381                 case OP_LOADU1_MEMBASE:
1382                 case OP_LOADI1_MEMBASE:
1383                         /* 
1384                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1385                          * OP_LOAD_MEMBASE offset(basereg), reg2
1386                          * -->
1387                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1388                          * CONV_I2/U2 reg1, reg2
1389                          */
1390                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1391                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1392                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1393                                         ins->inst_offset == last_ins->inst_offset) {
1394                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1395                                 ins->sreg1 = last_ins->sreg1;
1396                         }
1397                         break;
1398                 case OP_LOADU2_MEMBASE:
1399                 case OP_LOADI2_MEMBASE:
1400                         /* 
1401                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1402                          * OP_LOAD_MEMBASE offset(basereg), reg2
1403                          * -->
1404                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1405                          * CONV_I2/U2 reg1, reg2
1406                          */
1407                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1408                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1409                                         ins->inst_offset == last_ins->inst_offset) {
1410                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1411                                 ins->sreg1 = last_ins->sreg1;
1412                         }
1413                         break;
1414                 case CEE_CONV_I4:
1415                 case CEE_CONV_U4:
1416                 case OP_ICONV_TO_I4:
1417                 case OP_MOVE:
1418                         /*
1419                          * Removes:
1420                          *
1421                          * OP_MOVE reg, reg 
1422                          */
1423                         if (ins->dreg == ins->sreg1) {
1424                                 if (last_ins)
1425                                         last_ins->next = ins->next;                             
1426                                 ins = ins->next;
1427                                 continue;
1428                         }
1429                         /* 
1430                          * Removes:
1431                          *
1432                          * OP_MOVE sreg, dreg 
1433                          * OP_MOVE dreg, sreg
1434                          */
1435                         if (last_ins && last_ins->opcode == OP_MOVE &&
1436                             ins->sreg1 == last_ins->dreg &&
1437                             ins->dreg == last_ins->sreg1) {
1438                                 last_ins->next = ins->next;                             
1439                                 ins = ins->next;                                
1440                                 continue;
1441                         }
1442                         break;
1443                         
1444                 case OP_X86_PUSH_MEMBASE:
1445                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1446                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1447                             ins->inst_basereg == last_ins->inst_destbasereg &&
1448                             ins->inst_offset == last_ins->inst_offset) {
1449                                     ins->opcode = OP_X86_PUSH;
1450                                     ins->sreg1 = last_ins->sreg1;
1451                         }
1452                         break;
1453                 }
1454                 last_ins = ins;
1455                 ins = ins->next;
1456         }
1457         bb->last_ins = last_ins;
1458 }
1459
1460 static void
1461 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1462 {
1463         MonoInst *ins, *last_ins = NULL;
1464         ins = bb->code;
1465
1466         while (ins) {
1467
1468                 switch (ins->opcode) {
1469                 case OP_ICONST:
1470                         /* reg = 0 -> XOR (reg, reg) */
1471                         /* XOR sets cflags on x86, so we cant do it always */
1472                         if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1473                                 MonoInst *ins2;
1474
1475                                 ins->opcode = OP_IXOR;
1476                                 ins->sreg1 = ins->dreg;
1477                                 ins->sreg2 = ins->dreg;
1478
1479                                 /* 
1480                                  * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG 
1481                                  * since it takes 3 bytes instead of 7.
1482                                  */
1483                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1484                                         if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1485                                                 ins2->opcode = OP_STORE_MEMBASE_REG;
1486                                                 ins2->sreg1 = ins->dreg;
1487                                         }
1488                                         else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1489                                                 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1490                                                 ins2->sreg1 = ins->dreg;
1491                                         }
1492                                         else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1493                                                 /* Continue iteration */
1494                                         }
1495                                         else
1496                                                 break;
1497                                 }
1498                         }
1499                         break;
1500                 case OP_IADD_IMM:
1501                 case OP_ADD_IMM:
1502                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1503                                 ins->opcode = OP_X86_INC_REG;
1504                         break;
1505                 case OP_ISUB_IMM:
1506                 case OP_SUB_IMM:
1507                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1508                                 ins->opcode = OP_X86_DEC_REG;
1509                         break;
1510                 case OP_X86_COMPARE_MEMBASE_IMM:
1511                         /* 
1512                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1513                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1514                          * -->
1515                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1516                          * OP_COMPARE_IMM reg, imm
1517                          *
1518                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1519                          */
1520                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1521                             ins->inst_basereg == last_ins->inst_destbasereg &&
1522                             ins->inst_offset == last_ins->inst_offset) {
1523                                         ins->opcode = OP_COMPARE_IMM;
1524                                         ins->sreg1 = last_ins->sreg1;
1525
1526                                         /* check if we can remove cmp reg,0 with test null */
1527                                         if (!ins->inst_imm)
1528                                                 ins->opcode = OP_X86_TEST_NULL;
1529                                 }
1530
1531                         break;
1532                 case OP_LOAD_MEMBASE:
1533                 case OP_LOADI4_MEMBASE:
1534                         /* 
1535                          * Note: if reg1 = reg2 the load op is removed
1536                          *
1537                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1538                          * OP_LOAD_MEMBASE offset(basereg), reg2
1539                          * -->
1540                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1541                          * OP_MOVE reg1, reg2
1542                          */
1543                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1544                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1545                             ins->inst_basereg == last_ins->inst_destbasereg &&
1546                             ins->inst_offset == last_ins->inst_offset) {
1547                                 if (ins->dreg == last_ins->sreg1) {
1548                                         last_ins->next = ins->next;                             
1549                                         ins = ins->next;                                
1550                                         continue;
1551                                 } else {
1552                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1553                                         ins->opcode = OP_MOVE;
1554                                         ins->sreg1 = last_ins->sreg1;
1555                                 }
1556
1557                         /* 
1558                          * Note: reg1 must be different from the basereg in the second load
1559                          * Note: if reg1 = reg2 is equal then second load is removed
1560                          *
1561                          * OP_LOAD_MEMBASE offset(basereg), reg1
1562                          * OP_LOAD_MEMBASE offset(basereg), reg2
1563                          * -->
1564                          * OP_LOAD_MEMBASE offset(basereg), reg1
1565                          * OP_MOVE reg1, reg2
1566                          */
1567                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1568                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1569                               ins->inst_basereg != last_ins->dreg &&
1570                               ins->inst_basereg == last_ins->inst_basereg &&
1571                               ins->inst_offset == last_ins->inst_offset) {
1572
1573                                 if (ins->dreg == last_ins->dreg) {
1574                                         last_ins->next = ins->next;                             
1575                                         ins = ins->next;                                
1576                                         continue;
1577                                 } else {
1578                                         ins->opcode = OP_MOVE;
1579                                         ins->sreg1 = last_ins->dreg;
1580                                 }
1581
1582                                 //g_assert_not_reached ();
1583
1584 #if 0
1585                         /* 
1586                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1587                          * OP_LOAD_MEMBASE offset(basereg), reg
1588                          * -->
1589                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1590                          * OP_ICONST reg, imm
1591                          */
1592                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1593                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1594                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1595                                    ins->inst_offset == last_ins->inst_offset) {
1596                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1597                                 ins->opcode = OP_ICONST;
1598                                 ins->inst_c0 = last_ins->inst_imm;
1599                                 g_assert_not_reached (); // check this rule
1600 #endif
1601                         }
1602                         break;
1603                 case OP_LOADU1_MEMBASE:
1604                 case OP_LOADI1_MEMBASE:
1605                         /* 
1606                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1607                          * OP_LOAD_MEMBASE offset(basereg), reg2
1608                          * -->
1609                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1610                          * CONV_I2/U2 reg1, reg2
1611                          */
1612                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1613                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1614                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1615                                         ins->inst_offset == last_ins->inst_offset) {
1616                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1617                                 ins->sreg1 = last_ins->sreg1;
1618                         }
1619                         break;
1620                 case OP_LOADU2_MEMBASE:
1621                 case OP_LOADI2_MEMBASE:
1622                         /* 
1623                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1624                          * OP_LOAD_MEMBASE offset(basereg), reg2
1625                          * -->
1626                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1627                          * CONV_I2/U2 reg1, reg2
1628                          */
1629                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1630                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1631                                         ins->inst_offset == last_ins->inst_offset) {
1632                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1633                                 ins->sreg1 = last_ins->sreg1;
1634                         }
1635                         break;
1636                 case CEE_CONV_I4:
1637                 case CEE_CONV_U4:
1638                 case OP_ICONV_TO_I4:
1639                 case OP_MOVE:
1640                         /*
1641                          * Removes:
1642                          *
1643                          * OP_MOVE reg, reg 
1644                          */
1645                         if (ins->dreg == ins->sreg1) {
1646                                 if (last_ins)
1647                                         last_ins->next = ins->next;                             
1648                                 ins = ins->next;
1649                                 continue;
1650                         }
1651                         /* 
1652                          * Removes:
1653                          *
1654                          * OP_MOVE sreg, dreg 
1655                          * OP_MOVE dreg, sreg
1656                          */
1657                         if (last_ins && last_ins->opcode == OP_MOVE &&
1658                             ins->sreg1 == last_ins->dreg &&
1659                             ins->dreg == last_ins->sreg1) {
1660                                 last_ins->next = ins->next;                             
1661                                 ins = ins->next;                                
1662                                 continue;
1663                         }
1664                         break;
1665                 case OP_X86_PUSH_MEMBASE:
1666                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1667                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1668                             ins->inst_basereg == last_ins->inst_destbasereg &&
1669                             ins->inst_offset == last_ins->inst_offset) {
1670                                     ins->opcode = OP_X86_PUSH;
1671                                     ins->sreg1 = last_ins->sreg1;
1672                         }
1673                         break;
1674                 }
1675                 last_ins = ins;
1676                 ins = ins->next;
1677         }
1678         bb->last_ins = last_ins;
1679 }
1680
1681 static const int 
1682 branch_cc_table [] = {
1683         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1684         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1685         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1686 };
1687
1688 /* Maps CMP_... constants to X86_CC_... constants */
1689 static const int
1690 cc_table [] = {
1691         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1692         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1693 };
1694
1695 static const int
1696 cc_signed_table [] = {
1697         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1698         FALSE, FALSE, FALSE, FALSE
1699 };
1700
1701 void
1702 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1703 {
1704         if (cfg->opt & MONO_OPT_PEEPHOLE)
1705                 peephole_pass_1 (cfg, bb);
1706
1707         mono_local_regalloc (cfg, bb);
1708 }
1709
1710 static unsigned char*
1711 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1712 {
1713 #define XMM_TEMP_REG 0
1714         if (cfg->opt & MONO_OPT_SSE2 && size < 8) {
1715                 /* optimize by assigning a local var for this use so we avoid
1716                  * the stack manipulations */
1717                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1718                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1719                 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1720                 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1721                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1722                 if (size == 1)
1723                         x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1724                 else if (size == 2)
1725                         x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1726                 return code;
1727         }
1728         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1729         x86_fnstcw_membase(code, X86_ESP, 0);
1730         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1731         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1732         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1733         x86_fldcw_membase (code, X86_ESP, 2);
1734         if (size == 8) {
1735                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1736                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1737                 x86_pop_reg (code, dreg);
1738                 /* FIXME: need the high register 
1739                  * x86_pop_reg (code, dreg_high);
1740                  */
1741         } else {
1742                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1743                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1744                 x86_pop_reg (code, dreg);
1745         }
1746         x86_fldcw_membase (code, X86_ESP, 0);
1747         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1748
1749         if (size == 1)
1750                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1751         else if (size == 2)
1752                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1753         return code;
1754 }
1755
1756 static unsigned char*
1757 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1758 {
1759         int sreg = tree->sreg1;
1760         int need_touch = FALSE;
1761
1762 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1763         need_touch = TRUE;
1764 #endif
1765
1766         if (need_touch) {
1767                 guint8* br[5];
1768
1769                 /*
1770                  * Under Windows:
1771                  * If requested stack size is larger than one page,
1772                  * perform stack-touch operation
1773                  */
1774                 /*
1775                  * Generate stack probe code.
1776                  * Under Windows, it is necessary to allocate one page at a time,
1777                  * "touching" stack after each successful sub-allocation. This is
1778                  * because of the way stack growth is implemented - there is a
1779                  * guard page before the lowest stack page that is currently commited.
1780                  * Stack normally grows sequentially so OS traps access to the
1781                  * guard page and commits more pages when needed.
1782                  */
1783                 x86_test_reg_imm (code, sreg, ~0xFFF);
1784                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1785
1786                 br[2] = code; /* loop */
1787                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1788                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1789
1790                 /* 
1791                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1792                  * that follows only initializes the last part of the area.
1793                  */
1794                 /* Same as the init code below with size==0x1000 */
1795                 if (tree->flags & MONO_INST_INIT) {
1796                         x86_push_reg (code, X86_EAX);
1797                         x86_push_reg (code, X86_ECX);
1798                         x86_push_reg (code, X86_EDI);
1799                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1800                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1801                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1802                         x86_cld (code);
1803                         x86_prefix (code, X86_REP_PREFIX);
1804                         x86_stosl (code);
1805                         x86_pop_reg (code, X86_EDI);
1806                         x86_pop_reg (code, X86_ECX);
1807                         x86_pop_reg (code, X86_EAX);
1808                 }
1809
1810                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1811                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1812                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1813                 x86_patch (br[3], br[2]);
1814                 x86_test_reg_reg (code, sreg, sreg);
1815                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1816                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1817
1818                 br[1] = code; x86_jump8 (code, 0);
1819
1820                 x86_patch (br[0], code);
1821                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1822                 x86_patch (br[1], code);
1823                 x86_patch (br[4], code);
1824         }
1825         else
1826                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1827
1828         if (tree->flags & MONO_INST_INIT) {
1829                 int offset = 0;
1830                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1831                         x86_push_reg (code, X86_EAX);
1832                         offset += 4;
1833                 }
1834                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1835                         x86_push_reg (code, X86_ECX);
1836                         offset += 4;
1837                 }
1838                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1839                         x86_push_reg (code, X86_EDI);
1840                         offset += 4;
1841                 }
1842                 
1843                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1844                 if (sreg != X86_ECX)
1845                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1846                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1847                                 
1848                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1849                 x86_cld (code);
1850                 x86_prefix (code, X86_REP_PREFIX);
1851                 x86_stosl (code);
1852                 
1853                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1854                         x86_pop_reg (code, X86_EDI);
1855                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1856                         x86_pop_reg (code, X86_ECX);
1857                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1858                         x86_pop_reg (code, X86_EAX);
1859         }
1860         return code;
1861 }
1862
1863
1864 static guint8*
1865 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1866 {
1867         CallInfo *cinfo;
1868         int quad;
1869
1870         /* Move return value to the target register */
1871         switch (ins->opcode) {
1872         case CEE_CALL:
1873         case OP_CALL_REG:
1874         case OP_CALL_MEMBASE:
1875                 if (ins->dreg != X86_EAX)
1876                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1877                 break;
1878         case OP_VCALL:
1879         case OP_VCALL_REG:
1880         case OP_VCALL_MEMBASE:
1881                 cinfo = get_call_info (cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
1882                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1883                         /* Pop the destination address from the stack */
1884                         x86_pop_reg (code, X86_ECX);
1885                         
1886                         for (quad = 0; quad < 2; quad ++) {
1887                                 switch (cinfo->ret.pair_storage [quad]) {
1888                                 case ArgInIReg:
1889                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1890                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1891                                         break;
1892                                 case ArgNone:
1893                                         break;
1894                                 default:
1895                                         g_assert_not_reached ();
1896                                 }
1897                         }
1898                 }
1899         default:
1900                 break;
1901         }
1902
1903         return code;
1904 }
1905
1906 /*
1907  * emit_tls_get:
1908  * @code: buffer to store code to
1909  * @dreg: hard register where to place the result
1910  * @tls_offset: offset info
1911  *
1912  * emit_tls_get emits in @code the native code that puts in the dreg register
1913  * the item in the thread local storage identified by tls_offset.
1914  *
1915  * Returns: a pointer to the end of the stored code
1916  */
1917 static guint8*
1918 emit_tls_get (guint8* code, int dreg, int tls_offset)
1919 {
1920 #ifdef PLATFORM_WIN32
1921         /* 
1922          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1923          * Journal and/or a disassembly of the TlsGet () function.
1924          */
1925         g_assert (tls_offset < 64);
1926         x86_prefix (code, X86_FS_PREFIX);
1927         x86_mov_reg_mem (code, dreg, 0x18, 4);
1928         /* Dunno what this does but TlsGetValue () contains it */
1929         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1930         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1931 #else
1932         if (optimize_for_xen) {
1933                 x86_prefix (code, X86_GS_PREFIX);
1934                 x86_mov_reg_mem (code, dreg, 0, 4);
1935                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1936         } else {
1937                 x86_prefix (code, X86_GS_PREFIX);
1938                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1939         }
1940 #endif
1941         return code;
1942 }
1943
1944 /*
1945  * emit_load_volatile_arguments:
1946  *
1947  *  Load volatile arguments from the stack to the original input registers.
1948  * Required before a tail call.
1949  */
1950 static guint8*
1951 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
1952 {
1953         MonoMethod *method = cfg->method;
1954         MonoMethodSignature *sig;
1955         MonoInst *inst;
1956         CallInfo *cinfo;
1957         guint32 i;
1958
1959         /* FIXME: Generate intermediate code instead */
1960
1961         sig = mono_method_signature (method);
1962
1963         cinfo = get_call_info (cfg->mempool, sig, FALSE);
1964         
1965         /* This is the opposite of the code in emit_prolog */
1966
1967         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1968                 ArgInfo *ainfo = cinfo->args + i;
1969                 MonoType *arg_type;
1970                 inst = cfg->args [i];
1971
1972                 if (sig->hasthis && (i == 0))
1973                         arg_type = &mono_defaults.object_class->byval_arg;
1974                 else
1975                         arg_type = sig->params [i - sig->hasthis];
1976
1977                 /*
1978                  * On x86, the arguments are either in their original stack locations, or in
1979                  * global regs.
1980                  */
1981                 if (inst->opcode == OP_REGVAR) {
1982                         g_assert (ainfo->storage == ArgOnStack);
1983                         
1984                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
1985                 }
1986         }
1987
1988         return code;
1989 }
1990
1991 #define REAL_PRINT_REG(text,reg) \
1992 mono_assert (reg >= 0); \
1993 x86_push_reg (code, X86_EAX); \
1994 x86_push_reg (code, X86_EDX); \
1995 x86_push_reg (code, X86_ECX); \
1996 x86_push_reg (code, reg); \
1997 x86_push_imm (code, reg); \
1998 x86_push_imm (code, text " %d %p\n"); \
1999 x86_mov_reg_imm (code, X86_EAX, printf); \
2000 x86_call_reg (code, X86_EAX); \
2001 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2002 x86_pop_reg (code, X86_ECX); \
2003 x86_pop_reg (code, X86_EDX); \
2004 x86_pop_reg (code, X86_EAX);
2005
2006 /* benchmark and set based on cpu */
2007 #define LOOP_ALIGNMENT 8
2008 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2009
2010 void
2011 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2012 {
2013         MonoInst *ins;
2014         MonoCallInst *call;
2015         guint offset;
2016         guint8 *code = cfg->native_code + cfg->code_len;
2017         MonoInst *last_ins = NULL;
2018         guint last_offset = 0;
2019         int max_len, cpos;
2020
2021         if (cfg->opt & MONO_OPT_PEEPHOLE)
2022                 peephole_pass (cfg, bb);
2023
2024         if (cfg->opt & MONO_OPT_LOOP) {
2025                 int pad, align = LOOP_ALIGNMENT;
2026                 /* set alignment depending on cpu */
2027                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2028                         pad = align - pad;
2029                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2030                         x86_padding (code, pad);
2031                         cfg->code_len += pad;
2032                         bb->native_offset = cfg->code_len;
2033                 }
2034         }
2035
2036         if (cfg->verbose_level > 2)
2037                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2038
2039         cpos = bb->max_offset;
2040
2041         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2042                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2043                 g_assert (!cfg->compile_aot);
2044                 cpos += 6;
2045
2046                 cov->data [bb->dfn].cil_code = bb->cil_code;
2047                 /* this is not thread save, but good enough */
2048                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2049         }
2050
2051         offset = code - cfg->native_code;
2052
2053         mono_debug_open_block (cfg, bb, offset);
2054
2055         ins = bb->code;
2056         while (ins) {
2057                 offset = code - cfg->native_code;
2058
2059                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2060
2061                 if (offset > (cfg->code_size - max_len - 16)) {
2062                         cfg->code_size *= 2;
2063                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2064                         code = cfg->native_code + offset;
2065                         mono_jit_stats.code_reallocs++;
2066                 }
2067
2068                 mono_debug_record_line_number (cfg, ins, offset);
2069
2070                 switch (ins->opcode) {
2071                 case OP_BIGMUL:
2072                         x86_mul_reg (code, ins->sreg2, TRUE);
2073                         break;
2074                 case OP_BIGMUL_UN:
2075                         x86_mul_reg (code, ins->sreg2, FALSE);
2076                         break;
2077                 case OP_X86_SETEQ_MEMBASE:
2078                 case OP_X86_SETNE_MEMBASE:
2079                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2080                                          ins->inst_basereg, ins->inst_offset, TRUE);
2081                         break;
2082                 case OP_STOREI1_MEMBASE_IMM:
2083                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2084                         break;
2085                 case OP_STOREI2_MEMBASE_IMM:
2086                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2087                         break;
2088                 case OP_STORE_MEMBASE_IMM:
2089                 case OP_STOREI4_MEMBASE_IMM:
2090                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2091                         break;
2092                 case OP_STOREI1_MEMBASE_REG:
2093                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2094                         break;
2095                 case OP_STOREI2_MEMBASE_REG:
2096                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2097                         break;
2098                 case OP_STORE_MEMBASE_REG:
2099                 case OP_STOREI4_MEMBASE_REG:
2100                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2101                         break;
2102                 case CEE_LDIND_I:
2103                 case CEE_LDIND_I4:
2104                 case CEE_LDIND_U4:
2105                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2106                         break;
2107                 case OP_LOADU4_MEM:
2108                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2109                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2110                         break;
2111                 case OP_LOAD_MEMBASE:
2112                 case OP_LOADI4_MEMBASE:
2113                 case OP_LOADU4_MEMBASE:
2114                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2115                         break;
2116                 case OP_LOADU1_MEMBASE:
2117                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2118                         break;
2119                 case OP_LOADI1_MEMBASE:
2120                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2121                         break;
2122                 case OP_LOADU2_MEMBASE:
2123                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2124                         break;
2125                 case OP_LOADI2_MEMBASE:
2126                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2127                         break;
2128                 case CEE_CONV_I1:
2129                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2130                         break;
2131                 case CEE_CONV_I2:
2132                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2133                         break;
2134                 case CEE_CONV_U1:
2135                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2136                         break;
2137                 case CEE_CONV_U2:
2138                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2139                         break;
2140                 case OP_COMPARE:
2141                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2142                         break;
2143                 case OP_COMPARE_IMM:
2144                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2145                         break;
2146                 case OP_X86_COMPARE_MEMBASE_REG:
2147                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2148                         break;
2149                 case OP_X86_COMPARE_MEMBASE_IMM:
2150                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2151                         break;
2152                 case OP_X86_COMPARE_MEMBASE8_IMM:
2153                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2154                         break;
2155                 case OP_X86_COMPARE_REG_MEMBASE:
2156                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2157                         break;
2158                 case OP_X86_COMPARE_MEM_IMM:
2159                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2160                         break;
2161                 case OP_X86_TEST_NULL:
2162                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2163                         break;
2164                 case OP_X86_ADD_MEMBASE_IMM:
2165                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2166                         break;
2167                 case OP_X86_ADD_MEMBASE:
2168                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2169                         break;
2170                 case OP_X86_SUB_MEMBASE_IMM:
2171                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2172                         break;
2173                 case OP_X86_SUB_MEMBASE:
2174                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2175                         break;
2176                 case OP_X86_AND_MEMBASE_IMM:
2177                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2178                         break;
2179                 case OP_X86_OR_MEMBASE_IMM:
2180                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2181                         break;
2182                 case OP_X86_XOR_MEMBASE_IMM:
2183                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2184                         break;
2185                 case OP_X86_INC_MEMBASE:
2186                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2187                         break;
2188                 case OP_X86_INC_REG:
2189                         x86_inc_reg (code, ins->dreg);
2190                         break;
2191                 case OP_X86_DEC_MEMBASE:
2192                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2193                         break;
2194                 case OP_X86_DEC_REG:
2195                         x86_dec_reg (code, ins->dreg);
2196                         break;
2197                 case OP_X86_MUL_MEMBASE:
2198                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2199                         break;
2200                 case OP_BREAK:
2201                         x86_breakpoint (code);
2202                         break;
2203                 case OP_ADDCC:
2204                 case CEE_ADD:
2205                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2206                         break;
2207                 case OP_ADC:
2208                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2209                         break;
2210                 case OP_ADDCC_IMM:
2211                 case OP_ADD_IMM:
2212                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2213                         break;
2214                 case OP_ADC_IMM:
2215                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2216                         break;
2217                 case OP_SUBCC:
2218                 case CEE_SUB:
2219                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2220                         break;
2221                 case OP_SBB:
2222                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2223                         break;
2224                 case OP_SUBCC_IMM:
2225                 case OP_SUB_IMM:
2226                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2227                         break;
2228                 case OP_SBB_IMM:
2229                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2230                         break;
2231                 case CEE_AND:
2232                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2233                         break;
2234                 case OP_AND_IMM:
2235                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2236                         break;
2237                 case CEE_DIV:
2238                         x86_cdq (code);
2239                         x86_div_reg (code, ins->sreg2, TRUE);
2240                         break;
2241                 case CEE_DIV_UN:
2242                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2243                         x86_div_reg (code, ins->sreg2, FALSE);
2244                         break;
2245                 case OP_DIV_IMM:
2246                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2247                         x86_cdq (code);
2248                         x86_div_reg (code, ins->sreg2, TRUE);
2249                         break;
2250                 case CEE_REM:
2251                         x86_cdq (code);
2252                         x86_div_reg (code, ins->sreg2, TRUE);
2253                         break;
2254                 case CEE_REM_UN:
2255                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2256                         x86_div_reg (code, ins->sreg2, FALSE);
2257                         break;
2258                 case OP_REM_IMM:
2259                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2260                         x86_cdq (code);
2261                         x86_div_reg (code, ins->sreg2, TRUE);
2262                         break;
2263                 case CEE_OR:
2264                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2265                         break;
2266                 case OP_OR_IMM:
2267                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2268                         break;
2269                 case CEE_XOR:
2270                 case OP_IXOR:
2271                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2272                         break;
2273                 case OP_XOR_IMM:
2274                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2275                         break;
2276                 case CEE_SHL:
2277                         g_assert (ins->sreg2 == X86_ECX);
2278                         x86_shift_reg (code, X86_SHL, ins->dreg);
2279                         break;
2280                 case CEE_SHR:
2281                         g_assert (ins->sreg2 == X86_ECX);
2282                         x86_shift_reg (code, X86_SAR, ins->dreg);
2283                         break;
2284                 case OP_SHR_IMM:
2285                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2286                         break;
2287                 case OP_SHR_UN_IMM:
2288                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2289                         break;
2290                 case CEE_SHR_UN:
2291                         g_assert (ins->sreg2 == X86_ECX);
2292                         x86_shift_reg (code, X86_SHR, ins->dreg);
2293                         break;
2294                 case OP_SHL_IMM:
2295                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2296                         break;
2297                 case OP_LSHL: {
2298                         guint8 *jump_to_end;
2299
2300                         /* handle shifts below 32 bits */
2301                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2302                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2303
2304                         x86_test_reg_imm (code, X86_ECX, 32);
2305                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2306
2307                         /* handle shift over 32 bit */
2308                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2309                         x86_clear_reg (code, ins->sreg1);
2310                         
2311                         x86_patch (jump_to_end, code);
2312                         }
2313                         break;
2314                 case OP_LSHR: {
2315                         guint8 *jump_to_end;
2316
2317                         /* handle shifts below 32 bits */
2318                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2319                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2320
2321                         x86_test_reg_imm (code, X86_ECX, 32);
2322                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2323
2324                         /* handle shifts over 31 bits */
2325                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2326                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2327                         
2328                         x86_patch (jump_to_end, code);
2329                         }
2330                         break;
2331                 case OP_LSHR_UN: {
2332                         guint8 *jump_to_end;
2333
2334                         /* handle shifts below 32 bits */
2335                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2336                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2337
2338                         x86_test_reg_imm (code, X86_ECX, 32);
2339                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2340
2341                         /* handle shifts over 31 bits */
2342                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2343                         x86_clear_reg (code, ins->backend.reg3);
2344                         
2345                         x86_patch (jump_to_end, code);
2346                         }
2347                         break;
2348                 case OP_LSHL_IMM:
2349                         if (ins->inst_imm >= 32) {
2350                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2351                                 x86_clear_reg (code, ins->sreg1);
2352                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2353                         } else {
2354                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2355                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2356                         }
2357                         break;
2358                 case OP_LSHR_IMM:
2359                         if (ins->inst_imm >= 32) {
2360                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2361                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2362                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2363                         } else {
2364                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2365                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2366                         }
2367                         break;
2368                 case OP_LSHR_UN_IMM:
2369                         if (ins->inst_imm >= 32) {
2370                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2371                                 x86_clear_reg (code, ins->backend.reg3);
2372                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2373                         } else {
2374                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2375                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2376                         }
2377                         break;
2378                 case CEE_NOT:
2379                         x86_not_reg (code, ins->sreg1);
2380                         break;
2381                 case CEE_NEG:
2382                         x86_neg_reg (code, ins->sreg1);
2383                         break;
2384                 case OP_SEXT_I1:
2385                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2386                         break;
2387                 case OP_SEXT_I2:
2388                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2389                         break;
2390                 case CEE_MUL:
2391                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2392                         break;
2393                 case OP_MUL_IMM:
2394                         switch (ins->inst_imm) {
2395                         case 2:
2396                                 /* MOV r1, r2 */
2397                                 /* ADD r1, r1 */
2398                                 if (ins->dreg != ins->sreg1)
2399                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2400                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2401                                 break;
2402                         case 3:
2403                                 /* LEA r1, [r2 + r2*2] */
2404                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2405                                 break;
2406                         case 5:
2407                                 /* LEA r1, [r2 + r2*4] */
2408                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2409                                 break;
2410                         case 6:
2411                                 /* LEA r1, [r2 + r2*2] */
2412                                 /* ADD r1, r1          */
2413                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2414                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2415                                 break;
2416                         case 9:
2417                                 /* LEA r1, [r2 + r2*8] */
2418                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2419                                 break;
2420                         case 10:
2421                                 /* LEA r1, [r2 + r2*4] */
2422                                 /* ADD r1, r1          */
2423                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2424                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2425                                 break;
2426                         case 12:
2427                                 /* LEA r1, [r2 + r2*2] */
2428                                 /* SHL r1, 2           */
2429                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2430                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2431                                 break;
2432                         case 25:
2433                                 /* LEA r1, [r2 + r2*4] */
2434                                 /* LEA r1, [r1 + r1*4] */
2435                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2436                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2437                                 break;
2438                         case 100:
2439                                 /* LEA r1, [r2 + r2*4] */
2440                                 /* SHL r1, 2           */
2441                                 /* LEA r1, [r1 + r1*4] */
2442                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2443                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2444                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2445                                 break;
2446                         default:
2447                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2448                                 break;
2449                         }
2450                         break;
2451                 case CEE_MUL_OVF:
2452                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2453                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2454                         break;
2455                 case CEE_MUL_OVF_UN: {
2456                         /* the mul operation and the exception check should most likely be split */
2457                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2458                         /*g_assert (ins->sreg2 == X86_EAX);
2459                         g_assert (ins->dreg == X86_EAX);*/
2460                         if (ins->sreg2 == X86_EAX) {
2461                                 non_eax_reg = ins->sreg1;
2462                         } else if (ins->sreg1 == X86_EAX) {
2463                                 non_eax_reg = ins->sreg2;
2464                         } else {
2465                                 /* no need to save since we're going to store to it anyway */
2466                                 if (ins->dreg != X86_EAX) {
2467                                         saved_eax = TRUE;
2468                                         x86_push_reg (code, X86_EAX);
2469                                 }
2470                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2471                                 non_eax_reg = ins->sreg2;
2472                         }
2473                         if (ins->dreg == X86_EDX) {
2474                                 if (!saved_eax) {
2475                                         saved_eax = TRUE;
2476                                         x86_push_reg (code, X86_EAX);
2477                                 }
2478                         } else if (ins->dreg != X86_EAX) {
2479                                 saved_edx = TRUE;
2480                                 x86_push_reg (code, X86_EDX);
2481                         }
2482                         x86_mul_reg (code, non_eax_reg, FALSE);
2483                         /* save before the check since pop and mov don't change the flags */
2484                         if (ins->dreg != X86_EAX)
2485                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2486                         if (saved_edx)
2487                                 x86_pop_reg (code, X86_EDX);
2488                         if (saved_eax)
2489                                 x86_pop_reg (code, X86_EAX);
2490                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2491                         break;
2492                 }
2493                 case OP_ICONST:
2494                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2495                         break;
2496                 case OP_AOTCONST:
2497                         g_assert_not_reached ();
2498                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2499                         x86_mov_reg_imm (code, ins->dreg, 0);
2500                         break;
2501                 case OP_LOAD_GOTADDR:
2502                         x86_call_imm (code, 0);
2503                         /* 
2504                          * The patch needs to point to the pop, since the GOT offset needs 
2505                          * to be added to that address.
2506                          */
2507                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2508                         x86_pop_reg (code, ins->dreg);
2509                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2510                         break;
2511                 case OP_GOT_ENTRY:
2512                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2513                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2514                         break;
2515                 case OP_X86_PUSH_GOT_ENTRY:
2516                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2517                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2518                         break;
2519                 case CEE_CONV_I4:
2520                 case OP_MOVE:
2521                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2522                         break;
2523                 case CEE_CONV_U4:
2524                         g_assert_not_reached ();
2525                 case OP_JMP: {
2526                         /*
2527                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2528                          * Keep in sync with the code in emit_epilog.
2529                          */
2530                         int pos = 0;
2531
2532                         /* FIXME: no tracing support... */
2533                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2534                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2535                         /* reset offset to make max_len work */
2536                         offset = code - cfg->native_code;
2537
2538                         g_assert (!cfg->method->save_lmf);
2539
2540                         code = emit_load_volatile_arguments (cfg, code);
2541
2542                         if (cfg->used_int_regs & (1 << X86_EBX))
2543                                 pos -= 4;
2544                         if (cfg->used_int_regs & (1 << X86_EDI))
2545                                 pos -= 4;
2546                         if (cfg->used_int_regs & (1 << X86_ESI))
2547                                 pos -= 4;
2548                         if (pos)
2549                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2550         
2551                         if (cfg->used_int_regs & (1 << X86_ESI))
2552                                 x86_pop_reg (code, X86_ESI);
2553                         if (cfg->used_int_regs & (1 << X86_EDI))
2554                                 x86_pop_reg (code, X86_EDI);
2555                         if (cfg->used_int_regs & (1 << X86_EBX))
2556                                 x86_pop_reg (code, X86_EBX);
2557         
2558                         /* restore ESP/EBP */
2559                         x86_leave (code);
2560                         offset = code - cfg->native_code;
2561                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2562                         x86_jump32 (code, 0);
2563                         break;
2564                 }
2565                 case OP_CHECK_THIS:
2566                         /* ensure ins->sreg1 is not NULL
2567                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2568                          * cmp DWORD PTR [eax], 0
2569                          */
2570                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2571                         break;
2572                 case OP_ARGLIST: {
2573                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2574                         x86_push_reg (code, hreg);
2575                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2576                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2577                         x86_pop_reg (code, hreg);
2578                         break;
2579                 }
2580                 case OP_FCALL:
2581                 case OP_LCALL:
2582                 case OP_VCALL:
2583                 case OP_VOIDCALL:
2584                 case CEE_CALL:
2585                         call = (MonoCallInst*)ins;
2586                         if (ins->flags & MONO_INST_HAS_METHOD)
2587                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2588                         else
2589                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2590                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2591                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2592                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2593                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2594                                  * smart enough to do that optimization yet
2595                                  *
2596                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2597                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2598                                  * (most likely from locality benefits). People with other processors should
2599                                  * check on theirs to see what happens.
2600                                  */
2601                                 if (call->stack_usage == 4) {
2602                                         /* we want to use registers that won't get used soon, so use
2603                                          * ecx, as eax will get allocated first. edx is used by long calls,
2604                                          * so we can't use that.
2605                                          */
2606                                         
2607                                         x86_pop_reg (code, X86_ECX);
2608                                 } else {
2609                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2610                                 }
2611                         }
2612                         code = emit_move_return_value (cfg, ins, code);
2613                         break;
2614                 case OP_FCALL_REG:
2615                 case OP_LCALL_REG:
2616                 case OP_VCALL_REG:
2617                 case OP_VOIDCALL_REG:
2618                 case OP_CALL_REG:
2619                         call = (MonoCallInst*)ins;
2620                         x86_call_reg (code, ins->sreg1);
2621                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2622                                 if (call->stack_usage == 4)
2623                                         x86_pop_reg (code, X86_ECX);
2624                                 else
2625                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2626                         }
2627                         code = emit_move_return_value (cfg, ins, code);
2628                         break;
2629                 case OP_FCALL_MEMBASE:
2630                 case OP_LCALL_MEMBASE:
2631                 case OP_VCALL_MEMBASE:
2632                 case OP_VOIDCALL_MEMBASE:
2633                 case OP_CALL_MEMBASE:
2634                         call = (MonoCallInst*)ins;
2635                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2636                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2637                                 if (call->stack_usage == 4)
2638                                         x86_pop_reg (code, X86_ECX);
2639                                 else
2640                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2641                         }
2642                         code = emit_move_return_value (cfg, ins, code);
2643                         break;
2644                 case OP_OUTARG:
2645                 case OP_X86_PUSH:
2646                         x86_push_reg (code, ins->sreg1);
2647                         break;
2648                 case OP_X86_PUSH_IMM:
2649                         x86_push_imm (code, ins->inst_imm);
2650                         break;
2651                 case OP_X86_PUSH_MEMBASE:
2652                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2653                         break;
2654                 case OP_X86_PUSH_OBJ: 
2655                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2656                         x86_push_reg (code, X86_EDI);
2657                         x86_push_reg (code, X86_ESI);
2658                         x86_push_reg (code, X86_ECX);
2659                         if (ins->inst_offset)
2660                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2661                         else
2662                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2663                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2664                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2665                         x86_cld (code);
2666                         x86_prefix (code, X86_REP_PREFIX);
2667                         x86_movsd (code);
2668                         x86_pop_reg (code, X86_ECX);
2669                         x86_pop_reg (code, X86_ESI);
2670                         x86_pop_reg (code, X86_EDI);
2671                         break;
2672                 case OP_X86_LEA:
2673                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2674                         break;
2675                 case OP_X86_LEA_MEMBASE:
2676                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2677                         break;
2678                 case OP_X86_XCHG:
2679                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2680                         break;
2681                 case OP_LOCALLOC:
2682                         /* keep alignment */
2683                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2684                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2685                         code = mono_emit_stack_alloc (code, ins);
2686                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2687                         break;
2688                 case CEE_RET:
2689                         x86_ret (code);
2690                         break;
2691                 case OP_THROW: {
2692                         x86_push_reg (code, ins->sreg1);
2693                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2694                                                           (gpointer)"mono_arch_throw_exception");
2695                         break;
2696                 }
2697                 case OP_RETHROW: {
2698                         x86_push_reg (code, ins->sreg1);
2699                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2700                                                           (gpointer)"mono_arch_rethrow_exception");
2701                         break;
2702                 }
2703                 case OP_CALL_HANDLER: 
2704                         /* Align stack */
2705 #ifdef __APPLE__
2706                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2707 #endif
2708                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2709                         x86_call_imm (code, 0);
2710 #ifdef __APPLE__
2711                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2712 #endif
2713                         break;
2714                 case OP_LABEL:
2715                         ins->inst_c0 = code - cfg->native_code;
2716                         break;
2717                 case OP_BR:
2718                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2719                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2720                         //break;
2721                         if (ins->flags & MONO_INST_BRLABEL) {
2722                                 if (ins->inst_i0->inst_c0) {
2723                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2724                                 } else {
2725                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2726                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2727                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2728                                                 x86_jump8 (code, 0);
2729                                         else 
2730                                                 x86_jump32 (code, 0);
2731                                 }
2732                         } else {
2733                                 if (ins->inst_target_bb->native_offset) {
2734                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2735                                 } else {
2736                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2737                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2738                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2739                                                 x86_jump8 (code, 0);
2740                                         else 
2741                                                 x86_jump32 (code, 0);
2742                                 } 
2743                         }
2744                         break;
2745                 case OP_BR_REG:
2746                         x86_jump_reg (code, ins->sreg1);
2747                         break;
2748                 case OP_CEQ:
2749                 case OP_CLT:
2750                 case OP_CLT_UN:
2751                 case OP_CGT:
2752                 case OP_CGT_UN:
2753                 case OP_CNE:
2754                         x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2755                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2756                         break;
2757                 case OP_COND_EXC_EQ:
2758                 case OP_COND_EXC_NE_UN:
2759                 case OP_COND_EXC_LT:
2760                 case OP_COND_EXC_LT_UN:
2761                 case OP_COND_EXC_GT:
2762                 case OP_COND_EXC_GT_UN:
2763                 case OP_COND_EXC_GE:
2764                 case OP_COND_EXC_GE_UN:
2765                 case OP_COND_EXC_LE:
2766                 case OP_COND_EXC_LE_UN:
2767                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
2768                         break;
2769                 case OP_COND_EXC_OV:
2770                 case OP_COND_EXC_NO:
2771                 case OP_COND_EXC_C:
2772                 case OP_COND_EXC_NC:
2773                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2774                         break;
2775                 case CEE_BEQ:
2776                 case CEE_BNE_UN:
2777                 case CEE_BLT:
2778                 case CEE_BLT_UN:
2779                 case CEE_BGT:
2780                 case CEE_BGT_UN:
2781                 case CEE_BGE:
2782                 case CEE_BGE_UN:
2783                 case CEE_BLE:
2784                 case CEE_BLE_UN:
2785                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2786                         break;
2787
2788                 /* floating point opcodes */
2789                 case OP_R8CONST: {
2790                         double d = *(double *)ins->inst_p0;
2791
2792                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2793                                 x86_fldz (code);
2794                         } else if (d == 1.0) {
2795                                 x86_fld1 (code);
2796                         } else {
2797                                 if (cfg->compile_aot) {
2798                                         guint32 *val = (guint32*)&d;
2799                                         x86_push_imm (code, val [1]);
2800                                         x86_push_imm (code, val [0]);
2801                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2802                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2803                                 }
2804                                 else {
2805                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2806                                         x86_fld (code, NULL, TRUE);
2807                                 }
2808                         }
2809                         break;
2810                 }
2811                 case OP_R4CONST: {
2812                         float f = *(float *)ins->inst_p0;
2813
2814                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2815                                 x86_fldz (code);
2816                         } else if (f == 1.0) {
2817                                 x86_fld1 (code);
2818                         } else {
2819                                 if (cfg->compile_aot) {
2820                                         guint32 val = *(guint32*)&f;
2821                                         x86_push_imm (code, val);
2822                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2823                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2824                                 }
2825                                 else {
2826                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2827                                         x86_fld (code, NULL, FALSE);
2828                                 }
2829                         }
2830                         break;
2831                 }
2832                 case OP_STORER8_MEMBASE_REG:
2833                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2834                         break;
2835                 case OP_LOADR8_SPILL_MEMBASE:
2836                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2837                         x86_fxch (code, 1);
2838                         break;
2839                 case OP_LOADR8_MEMBASE:
2840                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2841                         break;
2842                 case OP_STORER4_MEMBASE_REG:
2843                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2844                         break;
2845                 case OP_LOADR4_MEMBASE:
2846                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2847                         break;
2848                 case CEE_CONV_R4: /* FIXME: change precision */
2849                 case CEE_CONV_R8:
2850                         x86_push_reg (code, ins->sreg1);
2851                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2852                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2853                         break;
2854                 case OP_X86_FP_LOAD_I8:
2855                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2856                         break;
2857                 case OP_X86_FP_LOAD_I4:
2858                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2859                         break;
2860                 case OP_FCONV_TO_I1:
2861                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2862                         break;
2863                 case OP_FCONV_TO_U1:
2864                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2865                         break;
2866                 case OP_FCONV_TO_I2:
2867                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2868                         break;
2869                 case OP_FCONV_TO_U2:
2870                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2871                         break;
2872                 case OP_FCONV_TO_I4:
2873                 case OP_FCONV_TO_I:
2874                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2875                         break;
2876                 case OP_FCONV_TO_I8:
2877                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2878                         x86_fnstcw_membase(code, X86_ESP, 0);
2879                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2880                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2881                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2882                         x86_fldcw_membase (code, X86_ESP, 2);
2883                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2884                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2885                         x86_pop_reg (code, ins->dreg);
2886                         x86_pop_reg (code, ins->backend.reg3);
2887                         x86_fldcw_membase (code, X86_ESP, 0);
2888                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2889                         break;
2890                 case OP_LCONV_TO_R_UN: { 
2891                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2892                         guint8 *br;
2893
2894                         /* load 64bit integer to FP stack */
2895                         x86_push_imm (code, 0);
2896                         x86_push_reg (code, ins->sreg2);
2897                         x86_push_reg (code, ins->sreg1);
2898                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2899                         /* store as 80bit FP value */
2900                         x86_fst80_membase (code, X86_ESP, 0);
2901                         
2902                         /* test if lreg is negative */
2903                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2904                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2905         
2906                         /* add correction constant mn */
2907                         x86_fld80_mem (code, mn);
2908                         x86_fld80_membase (code, X86_ESP, 0);
2909                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2910                         x86_fst80_membase (code, X86_ESP, 0);
2911
2912                         x86_patch (br, code);
2913
2914                         x86_fld80_membase (code, X86_ESP, 0);
2915                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2916
2917                         break;
2918                 }
2919                 case OP_LCONV_TO_OVF_I: {
2920                         guint8 *br [3], *label [1];
2921                         MonoInst *tins;
2922
2923                         /* 
2924                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2925                          */
2926                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2927
2928                         /* If the low word top bit is set, see if we are negative */
2929                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2930                         /* We are not negative (no top bit set, check for our top word to be zero */
2931                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2932                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2933                         label [0] = code;
2934
2935                         /* throw exception */
2936                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2937                         if (tins) {
2938                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2939                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2940                                         x86_jump8 (code, 0);
2941                                 else
2942                                         x86_jump32 (code, 0);
2943                         } else {
2944                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2945                                 x86_jump32 (code, 0);
2946                         }
2947         
2948         
2949                         x86_patch (br [0], code);
2950                         /* our top bit is set, check that top word is 0xfffffff */
2951                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2952                 
2953                         x86_patch (br [1], code);
2954                         /* nope, emit exception */
2955                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2956                         x86_patch (br [2], label [0]);
2957
2958                         if (ins->dreg != ins->sreg1)
2959                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2960                         break;
2961                 }
2962                 case OP_FADD:
2963                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2964                         break;
2965                 case OP_FSUB:
2966                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2967                         break;          
2968                 case OP_FMUL:
2969                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2970                         break;          
2971                 case OP_FDIV:
2972                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2973                         break;          
2974                 case OP_FNEG:
2975                         x86_fchs (code);
2976                         break;          
2977                 case OP_SIN:
2978                         x86_fsin (code);
2979                         x86_fldz (code);
2980                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2981                         break;          
2982                 case OP_COS:
2983                         x86_fcos (code);
2984                         x86_fldz (code);
2985                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2986                         break;          
2987                 case OP_ABS:
2988                         x86_fabs (code);
2989                         break;          
2990                 case OP_TAN: {
2991                         /* 
2992                          * it really doesn't make sense to inline all this code,
2993                          * it's here just to show that things may not be as simple 
2994                          * as they appear.
2995                          */
2996                         guchar *check_pos, *end_tan, *pop_jump;
2997                         x86_push_reg (code, X86_EAX);
2998                         x86_fptan (code);
2999                         x86_fnstsw (code);
3000                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3001                         check_pos = code;
3002                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3003                         x86_fstp (code, 0); /* pop the 1.0 */
3004                         end_tan = code;
3005                         x86_jump8 (code, 0);
3006                         x86_fldpi (code);
3007                         x86_fp_op (code, X86_FADD, 0);
3008                         x86_fxch (code, 1);
3009                         x86_fprem1 (code);
3010                         x86_fstsw (code);
3011                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3012                         pop_jump = code;
3013                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3014                         x86_fstp (code, 1);
3015                         x86_fptan (code);
3016                         x86_patch (pop_jump, code);
3017                         x86_fstp (code, 0); /* pop the 1.0 */
3018                         x86_patch (check_pos, code);
3019                         x86_patch (end_tan, code);
3020                         x86_fldz (code);
3021                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3022                         x86_pop_reg (code, X86_EAX);
3023                         break;
3024                 }
3025                 case OP_ATAN:
3026                         x86_fld1 (code);
3027                         x86_fpatan (code);
3028                         x86_fldz (code);
3029                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3030                         break;          
3031                 case OP_SQRT:
3032                         x86_fsqrt (code);
3033                         break;          
3034                 case OP_X86_FPOP:
3035                         x86_fstp (code, 0);
3036                         break;          
3037                 case OP_FREM: {
3038                         guint8 *l1, *l2;
3039
3040                         x86_push_reg (code, X86_EAX);
3041                         /* we need to exchange ST(0) with ST(1) */
3042                         x86_fxch (code, 1);
3043
3044                         /* this requires a loop, because fprem somtimes 
3045                          * returns a partial remainder */
3046                         l1 = code;
3047                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3048                         /* x86_fprem1 (code); */
3049                         x86_fprem (code);
3050                         x86_fnstsw (code);
3051                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3052                         l2 = code + 2;
3053                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3054
3055                         /* pop result */
3056                         x86_fstp (code, 1);
3057
3058                         x86_pop_reg (code, X86_EAX);
3059                         break;
3060                 }
3061                 case OP_FCOMPARE:
3062                         if (cfg->opt & MONO_OPT_FCMOV) {
3063                                 x86_fcomip (code, 1);
3064                                 x86_fstp (code, 0);
3065                                 break;
3066                         }
3067                         /* this overwrites EAX */
3068                         EMIT_FPCOMPARE(code);
3069                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3070                         break;
3071                 case OP_FCEQ:
3072                         if (cfg->opt & MONO_OPT_FCMOV) {
3073                                 /* zeroing the register at the start results in 
3074                                  * shorter and faster code (we can also remove the widening op)
3075                                  */
3076                                 guchar *unordered_check;
3077                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3078                                 x86_fcomip (code, 1);
3079                                 x86_fstp (code, 0);
3080                                 unordered_check = code;
3081                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3082                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3083                                 x86_patch (unordered_check, code);
3084                                 break;
3085                         }
3086                         if (ins->dreg != X86_EAX) 
3087                                 x86_push_reg (code, X86_EAX);
3088
3089                         EMIT_FPCOMPARE(code);
3090                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3091                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3092                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3093                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3094
3095                         if (ins->dreg != X86_EAX) 
3096                                 x86_pop_reg (code, X86_EAX);
3097                         break;
3098                 case OP_FCLT:
3099                 case OP_FCLT_UN:
3100                         if (cfg->opt & MONO_OPT_FCMOV) {
3101                                 /* zeroing the register at the start results in 
3102                                  * shorter and faster code (we can also remove the widening op)
3103                                  */
3104                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3105                                 x86_fcomip (code, 1);
3106                                 x86_fstp (code, 0);
3107                                 if (ins->opcode == OP_FCLT_UN) {
3108                                         guchar *unordered_check = code;
3109                                         guchar *jump_to_end;
3110                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3111                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3112                                         jump_to_end = code;
3113                                         x86_jump8 (code, 0);
3114                                         x86_patch (unordered_check, code);
3115                                         x86_inc_reg (code, ins->dreg);
3116                                         x86_patch (jump_to_end, code);
3117                                 } else {
3118                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3119                                 }
3120                                 break;
3121                         }
3122                         if (ins->dreg != X86_EAX) 
3123                                 x86_push_reg (code, X86_EAX);
3124
3125                         EMIT_FPCOMPARE(code);
3126                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3127                         if (ins->opcode == OP_FCLT_UN) {
3128                                 guchar *is_not_zero_check, *end_jump;
3129                                 is_not_zero_check = code;
3130                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3131                                 end_jump = code;
3132                                 x86_jump8 (code, 0);
3133                                 x86_patch (is_not_zero_check, code);
3134                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3135
3136                                 x86_patch (end_jump, code);
3137                         }
3138                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3139                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3140
3141                         if (ins->dreg != X86_EAX) 
3142                                 x86_pop_reg (code, X86_EAX);
3143                         break;
3144                 case OP_FCGT:
3145                 case OP_FCGT_UN:
3146                         if (cfg->opt & MONO_OPT_FCMOV) {
3147                                 /* zeroing the register at the start results in 
3148                                  * shorter and faster code (we can also remove the widening op)
3149                                  */
3150                                 guchar *unordered_check;
3151                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3152                                 x86_fcomip (code, 1);
3153                                 x86_fstp (code, 0);
3154                                 if (ins->opcode == OP_FCGT) {
3155                                         unordered_check = code;
3156                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3157                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3158                                         x86_patch (unordered_check, code);
3159                                 } else {
3160                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3161                                 }
3162                                 break;
3163                         }
3164                         if (ins->dreg != X86_EAX) 
3165                                 x86_push_reg (code, X86_EAX);
3166
3167                         EMIT_FPCOMPARE(code);
3168                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3169                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3170                         if (ins->opcode == OP_FCGT_UN) {
3171                                 guchar *is_not_zero_check, *end_jump;
3172                                 is_not_zero_check = code;
3173                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3174                                 end_jump = code;
3175                                 x86_jump8 (code, 0);
3176                                 x86_patch (is_not_zero_check, code);
3177                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3178         
3179                                 x86_patch (end_jump, code);
3180                         }
3181                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3182                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3183
3184                         if (ins->dreg != X86_EAX) 
3185                                 x86_pop_reg (code, X86_EAX);
3186                         break;
3187                 case OP_FBEQ:
3188                         if (cfg->opt & MONO_OPT_FCMOV) {
3189                                 guchar *jump = code;
3190                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3191                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3192                                 x86_patch (jump, code);
3193                                 break;
3194                         }
3195                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3196                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3197                         break;
3198                 case OP_FBNE_UN:
3199                         /* Branch if C013 != 100 */
3200                         if (cfg->opt & MONO_OPT_FCMOV) {
3201                                 /* branch if !ZF or (PF|CF) */
3202                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3203                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3204                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3205                                 break;
3206                         }
3207                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3208                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3209                         break;
3210                 case OP_FBLT:
3211                         if (cfg->opt & MONO_OPT_FCMOV) {
3212                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3213                                 break;
3214                         }
3215                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3216                         break;
3217                 case OP_FBLT_UN:
3218                         if (cfg->opt & MONO_OPT_FCMOV) {
3219                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3220                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3221                                 break;
3222                         }
3223                         if (ins->opcode == OP_FBLT_UN) {
3224                                 guchar *is_not_zero_check, *end_jump;
3225                                 is_not_zero_check = code;
3226                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3227                                 end_jump = code;
3228                                 x86_jump8 (code, 0);
3229                                 x86_patch (is_not_zero_check, code);
3230                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3231
3232                                 x86_patch (end_jump, code);
3233                         }
3234                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3235                         break;
3236                 case OP_FBGT:
3237                 case OP_FBGT_UN:
3238                         if (cfg->opt & MONO_OPT_FCMOV) {
3239                                 if (ins->opcode == OP_FBGT) {
3240                                         guchar *br1;
3241
3242                                         /* skip branch if C1=1 */
3243                                         br1 = code;
3244                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3245                                         /* branch if (C0 | C3) = 1 */
3246                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3247                                         x86_patch (br1, code);
3248                                 } else {
3249                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3250                                 }
3251                                 break;
3252                         }
3253                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3254                         if (ins->opcode == OP_FBGT_UN) {
3255                                 guchar *is_not_zero_check, *end_jump;
3256                                 is_not_zero_check = code;
3257                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3258                                 end_jump = code;
3259                                 x86_jump8 (code, 0);
3260                                 x86_patch (is_not_zero_check, code);
3261                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3262
3263                                 x86_patch (end_jump, code);
3264                         }
3265                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3266                         break;
3267                 case OP_FBGE:
3268                         /* Branch if C013 == 100 or 001 */
3269                         if (cfg->opt & MONO_OPT_FCMOV) {
3270                                 guchar *br1;
3271
3272                                 /* skip branch if C1=1 */
3273                                 br1 = code;
3274                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3275                                 /* branch if (C0 | C3) = 1 */
3276                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3277                                 x86_patch (br1, code);
3278                                 break;
3279                         }
3280                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3281                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3282                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3283                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3284                         break;
3285                 case OP_FBGE_UN:
3286                         /* Branch if C013 == 000 */
3287                         if (cfg->opt & MONO_OPT_FCMOV) {
3288                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3289                                 break;
3290                         }
3291                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3292                         break;
3293                 case OP_FBLE:
3294                         /* Branch if C013=000 or 100 */
3295                         if (cfg->opt & MONO_OPT_FCMOV) {
3296                                 guchar *br1;
3297
3298                                 /* skip branch if C1=1 */
3299                                 br1 = code;
3300                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3301                                 /* branch if C0=0 */
3302                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3303                                 x86_patch (br1, code);
3304                                 break;
3305                         }
3306                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3307                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3308                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3309                         break;
3310                 case OP_FBLE_UN:
3311                         /* Branch if C013 != 001 */
3312                         if (cfg->opt & MONO_OPT_FCMOV) {
3313                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3314                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3315                                 break;
3316                         }
3317                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3318                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3319                         break;
3320                 case OP_CKFINITE: {
3321                         x86_push_reg (code, X86_EAX);
3322                         x86_fxam (code);
3323                         x86_fnstsw (code);
3324                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3325                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3326                         x86_pop_reg (code, X86_EAX);
3327                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3328                         break;
3329                 }
3330                 case OP_TLS_GET: {
3331                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3332                         break;
3333                 }
3334                 case OP_MEMORY_BARRIER: {
3335                         /* Not needed on x86 */
3336                         break;
3337                 }
3338                 case OP_ATOMIC_ADD_I4: {
3339                         int dreg = ins->dreg;
3340
3341                         if (dreg == ins->inst_basereg) {
3342                                 x86_push_reg (code, ins->sreg2);
3343                                 dreg = ins->sreg2;
3344                         } 
3345                         
3346                         if (dreg != ins->sreg2)
3347                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3348
3349                         x86_prefix (code, X86_LOCK_PREFIX);
3350                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3351
3352                         if (dreg != ins->dreg) {
3353                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3354                                 x86_pop_reg (code, dreg);
3355                         }
3356
3357                         break;
3358                 }
3359                 case OP_ATOMIC_ADD_NEW_I4: {
3360                         int dreg = ins->dreg;
3361
3362                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3363                         if (ins->sreg2 == dreg) {
3364                                 if (dreg == X86_EBX) {
3365                                         dreg = X86_EDI;
3366                                         if (ins->inst_basereg == X86_EDI)
3367                                                 dreg = X86_ESI;
3368                                 } else {
3369                                         dreg = X86_EBX;
3370                                         if (ins->inst_basereg == X86_EBX)
3371                                                 dreg = X86_EDI;
3372                                 }
3373                         } else if (ins->inst_basereg == dreg) {
3374                                 if (dreg == X86_EBX) {
3375                                         dreg = X86_EDI;
3376                                         if (ins->sreg2 == X86_EDI)
3377                                                 dreg = X86_ESI;
3378                                 } else {
3379                                         dreg = X86_EBX;
3380                                         if (ins->sreg2 == X86_EBX)
3381                                                 dreg = X86_EDI;
3382                                 }
3383                         }
3384
3385                         if (dreg != ins->dreg) {
3386                                 x86_push_reg (code, dreg);
3387                         }
3388
3389                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3390                         x86_prefix (code, X86_LOCK_PREFIX);
3391                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3392                         /* dreg contains the old value, add with sreg2 value */
3393                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3394                         
3395                         if (ins->dreg != dreg) {
3396                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3397                                 x86_pop_reg (code, dreg);
3398                         }
3399
3400                         break;
3401                 }
3402                 case OP_ATOMIC_EXCHANGE_I4: {
3403                         guchar *br[2];
3404                         int sreg2 = ins->sreg2;
3405                         int breg = ins->inst_basereg;
3406
3407                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3408                          * hack to overcome limits in x86 reg allocator 
3409                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3410                          */
3411                         if (ins->dreg != X86_EAX)
3412                                 x86_push_reg (code, X86_EAX);
3413                         
3414                         /* We need the EAX reg for the cmpxchg */
3415                         if (ins->sreg2 == X86_EAX) {
3416                                 x86_push_reg (code, X86_EDX);
3417                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3418                                 sreg2 = X86_EDX;
3419                         }
3420
3421                         if (breg == X86_EAX) {
3422                                 x86_push_reg (code, X86_ESI);
3423                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3424                                 breg = X86_ESI;
3425                         }
3426
3427                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3428
3429                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3430                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3431                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3432                         x86_patch (br [1], br [0]);
3433
3434                         if (breg != ins->inst_basereg)
3435                                 x86_pop_reg (code, X86_ESI);
3436
3437                         if (ins->dreg != X86_EAX) {
3438                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3439                                 x86_pop_reg (code, X86_EAX);
3440                         }
3441
3442                         if (ins->sreg2 != sreg2)
3443                                 x86_pop_reg (code, X86_EDX);
3444
3445                         break;
3446                 }
3447                 default:
3448                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3449                         g_assert_not_reached ();
3450                 }
3451
3452                 if ((code - cfg->native_code - offset) > max_len) {
3453                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3454                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3455                         g_assert_not_reached ();
3456                 }
3457                
3458                 cpos += max_len;
3459
3460                 last_ins = ins;
3461                 last_offset = offset;
3462                 
3463                 ins = ins->next;
3464         }
3465
3466         cfg->code_len = code - cfg->native_code;
3467 }
3468
3469 void
3470 mono_arch_register_lowlevel_calls (void)
3471 {
3472 }
3473
3474 void
3475 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3476 {
3477         MonoJumpInfo *patch_info;
3478         gboolean compile_aot = !run_cctors;
3479
3480         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3481                 unsigned char *ip = patch_info->ip.i + code;
3482                 const unsigned char *target;
3483
3484                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3485
3486                 if (compile_aot) {
3487                         switch (patch_info->type) {
3488                         case MONO_PATCH_INFO_BB:
3489                         case MONO_PATCH_INFO_LABEL:
3490                                 break;
3491                         default:
3492                                 /* No need to patch these */
3493                                 continue;
3494                         }
3495                 }
3496
3497                 switch (patch_info->type) {
3498                 case MONO_PATCH_INFO_IP:
3499                         *((gconstpointer *)(ip)) = target;
3500                         break;
3501                 case MONO_PATCH_INFO_CLASS_INIT: {
3502                         guint8 *code = ip;
3503                         /* Might already been changed to a nop */
3504                         x86_call_code (code, 0);
3505                         x86_patch (ip, target);
3506                         break;
3507                 }
3508                 case MONO_PATCH_INFO_ABS:
3509                 case MONO_PATCH_INFO_METHOD:
3510                 case MONO_PATCH_INFO_METHOD_JUMP:
3511                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3512                 case MONO_PATCH_INFO_BB:
3513                 case MONO_PATCH_INFO_LABEL:
3514                         x86_patch (ip, target);
3515                         break;
3516                 case MONO_PATCH_INFO_NONE:
3517                         break;
3518                 default: {
3519                         guint32 offset = mono_arch_get_patch_offset (ip);
3520                         *((gconstpointer *)(ip + offset)) = target;
3521                         break;
3522                 }
3523                 }
3524         }
3525 }
3526
3527 guint8 *
3528 mono_arch_emit_prolog (MonoCompile *cfg)
3529 {
3530         MonoMethod *method = cfg->method;
3531         MonoBasicBlock *bb;
3532         MonoMethodSignature *sig;
3533         MonoInst *inst;
3534         int alloc_size, pos, max_offset, i;
3535         guint8 *code;
3536
3537         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3538         code = cfg->native_code = g_malloc (cfg->code_size);
3539
3540         x86_push_reg (code, X86_EBP);
3541         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3542
3543         alloc_size = cfg->stack_offset;
3544         pos = 0;
3545
3546         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3547                 /* Might need to attach the thread to the JIT */
3548                 if (lmf_tls_offset != -1) {
3549                         guint8 *buf;
3550
3551                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3552                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3553                         buf = code;
3554                         x86_branch8 (code, X86_CC_NE, 0, 0);
3555                         x86_push_imm (code, cfg->domain);
3556                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3557                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3558                         x86_patch (buf, code);
3559 #ifdef PLATFORM_WIN32
3560                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3561                         /* FIXME: Add a separate key for LMF to avoid this */
3562                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3563 #endif
3564                 } else {
3565                         g_assert (!cfg->compile_aot);
3566                         x86_push_imm (code, cfg->domain);
3567                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3568                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3569                 }
3570         }
3571
3572         if (method->save_lmf) {
3573                 pos += sizeof (MonoLMF);
3574
3575                 /* save the current IP */
3576                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3577                 x86_push_imm_template (code);
3578
3579                 /* save all caller saved regs */
3580                 x86_push_reg (code, X86_EBP);
3581                 x86_push_reg (code, X86_ESI);
3582                 x86_push_reg (code, X86_EDI);
3583                 x86_push_reg (code, X86_EBX);
3584
3585                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3586                         /*
3587                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3588                          * through the mono_lmf_addr TLS variable.
3589                          */
3590                         /* %eax = previous_lmf */
3591                         x86_prefix (code, X86_GS_PREFIX);
3592                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3593                         /* skip method_info + lmf */
3594                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3595                         /* push previous_lmf */
3596                         x86_push_reg (code, X86_EAX);
3597                         /* new lmf = ESP */
3598                         x86_prefix (code, X86_GS_PREFIX);
3599                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
3600                 } else {
3601                         /* get the address of lmf for the current thread */
3602                         /* 
3603                          * This is performance critical so we try to use some tricks to make
3604                          * it fast.
3605                          */                                                                        
3606
3607                         if (lmf_addr_tls_offset != -1) {
3608                                 /* Load lmf quicky using the GS register */
3609                                 code = emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
3610 #ifdef PLATFORM_WIN32
3611                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3612                                 /* FIXME: Add a separate key for LMF to avoid this */
3613                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3614 #endif
3615                         } else {
3616                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3617                         }
3618
3619                         /* Skip method info */
3620                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3621
3622                         /* push lmf */
3623                         x86_push_reg (code, X86_EAX); 
3624                         /* push *lfm (previous_lmf) */
3625                         x86_push_membase (code, X86_EAX, 0);
3626                         /* *(lmf) = ESP */
3627                         x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3628                 }
3629         } else {
3630
3631                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3632                         x86_push_reg (code, X86_EBX);
3633                         pos += 4;
3634                 }
3635
3636                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3637                         x86_push_reg (code, X86_EDI);
3638                         pos += 4;
3639                 }
3640
3641                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3642                         x86_push_reg (code, X86_ESI);
3643                         pos += 4;
3644                 }
3645         }
3646
3647         alloc_size -= pos;
3648
3649 #if __APPLE__
3650         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3651         {
3652                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3653                 if (tot & 4) {
3654                         tot += 4;
3655                         alloc_size += 4;
3656                 }
3657                 if (tot & 8) {
3658                         alloc_size += 8;
3659                 }
3660         }
3661 #endif
3662
3663         if (alloc_size) {
3664                 /* See mono_emit_stack_alloc */
3665 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3666                 guint32 remaining_size = alloc_size;
3667                 while (remaining_size >= 0x1000) {
3668                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3669                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3670                         remaining_size -= 0x1000;
3671                 }
3672                 if (remaining_size)
3673                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3674 #else
3675                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3676 #endif
3677         }
3678
3679 #if __APPLE_
3680         /* check the stack is aligned */
3681         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3682         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3683         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3684         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3685         x86_breakpoint (code);
3686 #endif
3687
3688         /* compute max_offset in order to use short forward jumps */
3689         max_offset = 0;
3690         if (cfg->opt & MONO_OPT_BRANCH) {
3691                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3692                         MonoInst *ins = bb->code;
3693                         bb->max_offset = max_offset;
3694
3695                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3696                                 max_offset += 6;
3697                         /* max alignment for loops */
3698                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3699                                 max_offset += LOOP_ALIGNMENT;
3700
3701                         while (ins) {
3702                                 if (ins->opcode == OP_LABEL)
3703                                         ins->inst_c1 = max_offset;
3704                                 
3705                                 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3706                                 ins = ins->next;
3707                         }
3708                 }
3709         }
3710
3711         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3712                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3713
3714         /* load arguments allocated to register from the stack */
3715         sig = mono_method_signature (method);
3716         pos = 0;
3717
3718         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3719                 inst = cfg->args [pos];
3720                 if (inst->opcode == OP_REGVAR) {
3721                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3722                         if (cfg->verbose_level > 2)
3723                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3724                 }
3725                 pos++;
3726         }
3727
3728         cfg->code_len = code - cfg->native_code;
3729
3730         return code;
3731 }
3732
3733 void
3734 mono_arch_emit_epilog (MonoCompile *cfg)
3735 {
3736         MonoMethod *method = cfg->method;
3737         MonoMethodSignature *sig = mono_method_signature (method);
3738         int quad, pos;
3739         guint32 stack_to_pop;
3740         guint8 *code;
3741         int max_epilog_size = 16;
3742         CallInfo *cinfo;
3743         
3744         if (cfg->method->save_lmf)
3745                 max_epilog_size += 128;
3746         
3747         if (mono_jit_trace_calls != NULL)
3748                 max_epilog_size += 50;
3749
3750         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3751                 cfg->code_size *= 2;
3752                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3753                 mono_jit_stats.code_reallocs++;
3754         }
3755
3756         code = cfg->native_code + cfg->code_len;
3757
3758         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3759                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3760
3761         /* the code restoring the registers must be kept in sync with OP_JMP */
3762         pos = 0;
3763         
3764         if (method->save_lmf) {
3765                 gint32 prev_lmf_reg;
3766                 gint32 lmf_offset = -sizeof (MonoLMF);
3767
3768                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3769                         /*
3770                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3771                          * through the mono_lmf_addr TLS variable.
3772                          */
3773                         /* reg = previous_lmf */
3774                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3775
3776                         /* lmf = previous_lmf */
3777                         x86_prefix (code, X86_GS_PREFIX);
3778                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
3779                 } else {
3780                         /* Find a spare register */
3781                         switch (sig->ret->type) {
3782                         case MONO_TYPE_I8:
3783                         case MONO_TYPE_U8:
3784                                 prev_lmf_reg = X86_EDI;
3785                                 cfg->used_int_regs |= (1 << X86_EDI);
3786                                 break;
3787                         default:
3788                                 prev_lmf_reg = X86_EDX;
3789                                 break;
3790                         }
3791
3792                         /* reg = previous_lmf */
3793                         x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3794
3795                         /* ecx = lmf */
3796                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3797
3798                         /* *(lmf) = previous_lmf */
3799                         x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3800                 }
3801
3802                 /* restore caller saved regs */
3803                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3804                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3805                 }
3806
3807                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3808                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3809                 }
3810                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3811                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3812                 }
3813
3814                 /* EBP is restored by LEAVE */
3815         } else {
3816                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3817                         pos -= 4;
3818                 }
3819                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3820                         pos -= 4;
3821                 }
3822                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3823                         pos -= 4;
3824                 }
3825
3826                 if (pos)
3827                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3828
3829                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3830                         x86_pop_reg (code, X86_ESI);
3831                 }
3832                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3833                         x86_pop_reg (code, X86_EDI);
3834                 }
3835                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3836                         x86_pop_reg (code, X86_EBX);
3837                 }
3838         }
3839
3840         /* Load returned vtypes into registers if needed */
3841         cinfo = get_call_info (cfg->mempool, sig, FALSE);
3842         if (cinfo->ret.storage == ArgValuetypeInReg) {
3843                 for (quad = 0; quad < 2; quad ++) {
3844                         switch (cinfo->ret.pair_storage [quad]) {
3845                         case ArgInIReg:
3846                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3847                                 break;
3848                         case ArgOnFloatFpStack:
3849                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3850                                 break;
3851                         case ArgOnDoubleFpStack:
3852                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3853                                 break;
3854                         case ArgNone:
3855                                 break;
3856                         default:
3857                                 g_assert_not_reached ();
3858                         }
3859                 }
3860         }
3861
3862         x86_leave (code);
3863
3864         if (CALLCONV_IS_STDCALL (sig)) {
3865                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3866
3867                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3868         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3869                 stack_to_pop = 4;
3870         else
3871                 stack_to_pop = 0;
3872
3873         if (stack_to_pop)
3874                 x86_ret_imm (code, stack_to_pop);
3875         else
3876                 x86_ret (code);
3877
3878         cfg->code_len = code - cfg->native_code;
3879
3880         g_assert (cfg->code_len < cfg->code_size);
3881 }
3882
3883 void
3884 mono_arch_emit_exceptions (MonoCompile *cfg)
3885 {
3886         MonoJumpInfo *patch_info;
3887         int nthrows, i;
3888         guint8 *code;
3889         MonoClass *exc_classes [16];
3890         guint8 *exc_throw_start [16], *exc_throw_end [16];
3891         guint32 code_size;
3892         int exc_count = 0;
3893
3894         /* Compute needed space */
3895         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3896                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3897                         exc_count++;
3898         }
3899
3900         /* 
3901          * make sure we have enough space for exceptions
3902          * 16 is the size of two push_imm instructions and a call
3903          */
3904         if (cfg->compile_aot)
3905                 code_size = exc_count * 32;
3906         else
3907                 code_size = exc_count * 16;
3908
3909         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3910                 cfg->code_size *= 2;
3911                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3912                 mono_jit_stats.code_reallocs++;
3913         }
3914
3915         code = cfg->native_code + cfg->code_len;
3916
3917         nthrows = 0;
3918         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3919                 switch (patch_info->type) {
3920                 case MONO_PATCH_INFO_EXC: {
3921                         MonoClass *exc_class;
3922                         guint8 *buf, *buf2;
3923                         guint32 throw_ip;
3924
3925                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3926
3927                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3928                         g_assert (exc_class);
3929                         throw_ip = patch_info->ip.i;
3930
3931                         /* Find a throw sequence for the same exception class */
3932                         for (i = 0; i < nthrows; ++i)
3933                                 if (exc_classes [i] == exc_class)
3934                                         break;
3935                         if (i < nthrows) {
3936                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3937                                 x86_jump_code (code, exc_throw_start [i]);
3938                                 patch_info->type = MONO_PATCH_INFO_NONE;
3939                         }
3940                         else {
3941                                 guint32 size;
3942
3943                                 /* Compute size of code following the push <OFFSET> */
3944                                 size = 5 + 5;
3945
3946                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3947                                         /* Use the shorter form */
3948                                         buf = buf2 = code;
3949                                         x86_push_imm (code, 0);
3950                                 }
3951                                 else {
3952                                         buf = code;
3953                                         x86_push_imm (code, 0xf0f0f0f0);
3954                                         buf2 = code;
3955                                 }
3956
3957                                 if (nthrows < 16) {
3958                                         exc_classes [nthrows] = exc_class;
3959                                         exc_throw_start [nthrows] = code;
3960                                 }
3961
3962                                 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
3963                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3964                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3965                                 patch_info->ip.i = code - cfg->native_code;
3966                                 x86_call_code (code, 0);
3967                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3968                                 while (buf < buf2)
3969                                         x86_nop (buf);
3970
3971                                 if (nthrows < 16) {
3972                                         exc_throw_end [nthrows] = code;
3973                                         nthrows ++;
3974                                 }
3975                         }
3976                         break;
3977                 }
3978                 default:
3979                         /* do nothing */
3980                         break;
3981                 }
3982         }
3983
3984         cfg->code_len = code - cfg->native_code;
3985
3986         g_assert (cfg->code_len < cfg->code_size);
3987 }
3988
3989 void
3990 mono_arch_flush_icache (guint8 *code, gint size)
3991 {
3992         /* not needed */
3993 }
3994
3995 void
3996 mono_arch_flush_register_windows (void)
3997 {
3998 }
3999
4000 /*
4001  * Support for fast access to the thread-local lmf structure using the GS
4002  * segment register on NPTL + kernel 2.6.x.
4003  */
4004
4005 static gboolean tls_offset_inited = FALSE;
4006
4007 void
4008 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4009 {
4010         if (!tls_offset_inited) {
4011                 if (!getenv ("MONO_NO_TLS")) {
4012 #ifdef PLATFORM_WIN32
4013                         /* 
4014                          * We need to init this multiple times, since when we are first called, the key might not
4015                          * be initialized yet.
4016                          */
4017                         appdomain_tls_offset = mono_domain_get_tls_key ();
4018                         lmf_tls_offset = mono_get_jit_tls_key ();
4019                         thread_tls_offset = mono_thread_get_tls_key ();
4020
4021                         /* Only 64 tls entries can be accessed using inline code */
4022                         if (appdomain_tls_offset >= 64)
4023                                 appdomain_tls_offset = -1;
4024                         if (lmf_tls_offset >= 64)
4025                                 lmf_tls_offset = -1;
4026                         if (thread_tls_offset >= 64)
4027                                 thread_tls_offset = -1;
4028 #else
4029 #if MONO_XEN_OPT
4030                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
4031 #endif
4032                         tls_offset_inited = TRUE;
4033                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4034                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4035                         lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
4036                         thread_tls_offset = mono_thread_get_tls_offset ();
4037 #endif
4038                 }
4039         }               
4040 }
4041
4042 void
4043 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4044 {
4045 }
4046
4047 void
4048 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4049 {
4050         MonoCallInst *call = (MonoCallInst*)inst;
4051         CallInfo *cinfo = get_call_info (cfg->mempool, inst->signature, FALSE);
4052
4053         /* add the this argument */
4054         if (this_reg != -1) {
4055                 if (cinfo->args [0].storage == ArgInIReg) {
4056                         MonoInst *this;
4057                         MONO_INST_NEW (cfg, this, OP_MOVE);
4058                         this->type = this_type;
4059                         this->sreg1 = this_reg;
4060                         this->dreg = mono_regstate_next_int (cfg->rs);
4061                         mono_bblock_add_inst (cfg->cbb, this);
4062
4063                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
4064                 }
4065                 else {
4066                         MonoInst *this;
4067                         MONO_INST_NEW (cfg, this, OP_OUTARG);
4068                         this->type = this_type;
4069                         this->sreg1 = this_reg;
4070                         mono_bblock_add_inst (cfg->cbb, this);
4071                 }
4072         }
4073
4074         if (vt_reg != -1) {
4075                 MonoInst *vtarg;
4076
4077                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4078                         /*
4079                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4080                          * the stack. Save the address here, so the call instruction can
4081                          * access it.
4082                          */
4083                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4084                         vtarg->inst_destbasereg = X86_ESP;
4085                         vtarg->inst_offset = inst->stack_usage;
4086                         vtarg->sreg1 = vt_reg;
4087                         mono_bblock_add_inst (cfg->cbb, vtarg);
4088                 }
4089                 else if (cinfo->ret.storage == ArgInIReg) {
4090                         /* The return address is passed in a register */
4091                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
4092                         vtarg->sreg1 = vt_reg;
4093                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
4094                         mono_bblock_add_inst (cfg->cbb, vtarg);
4095
4096                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
4097                 } else {
4098                         MonoInst *vtarg;
4099                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4100                         vtarg->type = STACK_MP;
4101                         vtarg->sreg1 = vt_reg;
4102                         mono_bblock_add_inst (cfg->cbb, vtarg);
4103                 }
4104         }
4105 }
4106
4107 MonoInst*
4108 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4109 {
4110         MonoInst *ins = NULL;
4111
4112         if (cmethod->klass == mono_defaults.math_class) {
4113                 if (strcmp (cmethod->name, "Sin") == 0) {
4114                         MONO_INST_NEW (cfg, ins, OP_SIN);
4115                         ins->inst_i0 = args [0];
4116                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4117                         MONO_INST_NEW (cfg, ins, OP_COS);
4118                         ins->inst_i0 = args [0];
4119                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4120                         MONO_INST_NEW (cfg, ins, OP_TAN);
4121                         ins->inst_i0 = args [0];
4122                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4123                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4124                         ins->inst_i0 = args [0];
4125                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4126                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4127                         ins->inst_i0 = args [0];
4128                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4129                         MONO_INST_NEW (cfg, ins, OP_ABS);
4130                         ins->inst_i0 = args [0];
4131                 }
4132 #if 0
4133                 /* OP_FREM is not IEEE compatible */
4134                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4135                         MONO_INST_NEW (cfg, ins, OP_FREM);
4136                         ins->inst_i0 = args [0];
4137                         ins->inst_i1 = args [1];
4138                 }
4139 #endif
4140         } else if (cmethod->klass == mono_defaults.thread_class &&
4141                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
4142                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
4143         } else if(cmethod->klass->image == mono_defaults.corlib &&
4144                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4145                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4146
4147                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4148                         MonoInst *ins_iconst;
4149
4150                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4151                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4152                         ins_iconst->inst_c0 = 1;
4153
4154                         ins->inst_i0 = args [0];
4155                         ins->inst_i1 = ins_iconst;
4156                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4157                         MonoInst *ins_iconst;
4158
4159                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4160                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4161                         ins_iconst->inst_c0 = -1;
4162
4163                         ins->inst_i0 = args [0];
4164                         ins->inst_i1 = ins_iconst;
4165                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4166                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4167
4168                         ins->inst_i0 = args [0];
4169                         ins->inst_i1 = args [1];
4170                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4171                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4172
4173                         ins->inst_i0 = args [0];
4174                         ins->inst_i1 = args [1];
4175                 }
4176         }
4177
4178         return ins;
4179 }
4180
4181
4182 gboolean
4183 mono_arch_print_tree (MonoInst *tree, int arity)
4184 {
4185         return 0;
4186 }
4187
4188 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4189 {
4190         MonoInst* ins;
4191         
4192         if (appdomain_tls_offset == -1)
4193                 return NULL;
4194
4195         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4196         ins->inst_offset = appdomain_tls_offset;
4197         return ins;
4198 }
4199
4200 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4201 {
4202         MonoInst* ins;
4203
4204         if (thread_tls_offset == -1)
4205                 return NULL;
4206
4207         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4208         ins->inst_offset = thread_tls_offset;
4209         return ins;
4210 }
4211
4212 guint32
4213 mono_arch_get_patch_offset (guint8 *code)
4214 {
4215         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4216                 return 2;
4217         else if ((code [0] == 0xba))
4218                 return 1;
4219         else if ((code [0] == 0x68))
4220                 /* push IMM */
4221                 return 1;
4222         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4223                 /* push <OFFSET>(<REG>) */
4224                 return 2;
4225         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4226                 /* call *<OFFSET>(<REG>) */
4227                 return 2;
4228         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4229                 /* fldl <ADDR> */
4230                 return 2;
4231         else if ((code [0] == 0x58) && (code [1] == 0x05))
4232                 /* pop %eax; add <OFFSET>, %eax */
4233                 return 2;
4234         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4235                 /* pop <REG>; add <OFFSET>, <REG> */
4236                 return 3;
4237         else {
4238                 g_assert_not_reached ();
4239                 return -1;
4240         }
4241 }
4242
4243 gpointer*
4244 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4245 {
4246         guint8 reg = 0;
4247         gint32 disp = 0;
4248
4249         /* go to the start of the call instruction
4250          *
4251          * address_byte = (m << 6) | (o << 3) | reg
4252          * call opcode: 0xff address_byte displacement
4253          * 0xff m=1,o=2 imm8
4254          * 0xff m=2,o=2 imm32
4255          */
4256         code -= 6;
4257
4258         /* 
4259          * A given byte sequence can match more than case here, so we have to be
4260          * really careful about the ordering of the cases. Longer sequences
4261          * come first.
4262          */
4263         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
4264                 /*
4265                  * This is an interface call
4266                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
4267                  * ff 10                   call   *(%eax)
4268                  */
4269                 reg = x86_modrm_rm (code [5]);
4270                 disp = 0;
4271         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4272                 reg = code [4] & 0x07;
4273                 disp = (signed char)code [5];
4274         } else {
4275                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4276                         reg = code [1] & 0x07;
4277                         disp = *((gint32*)(code + 2));
4278                 } else if ((code [1] == 0xe8)) {
4279                         return NULL;
4280                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4281                         /*
4282                          * This is a interface call
4283                          * 8b 40 30   mov    0x30(%eax),%eax
4284                          * ff 10      call   *(%eax)
4285                          */
4286                         disp = 0;
4287                         reg = code [5] & 0x07;
4288                 }
4289                 else
4290                         return NULL;
4291         }
4292
4293         return (gpointer*)(((gint32)(regs [reg])) + disp);
4294 }
4295
4296 gpointer
4297 mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code)
4298 {
4299         guint32 esp = regs [X86_ESP];
4300         CallInfo *cinfo;
4301         gpointer res;
4302
4303         cinfo = get_call_info (NULL, sig, FALSE);
4304
4305         /*
4306          * The stack looks like:
4307          * <other args>
4308          * <this=delegate>
4309          * <possible vtype return address>
4310          * <return addr>
4311          * <4 pointers pushed by mono_arch_create_trampoline_code ()>
4312          */
4313         res = (((MonoObject**)esp) [5 + (cinfo->args [0].offset / 4)]);
4314         g_free (cinfo);
4315         return res;
4316 }
4317
4318 gpointer
4319 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
4320 {
4321         guint8 *code, *start;
4322         MonoDomain *domain = mono_domain_get ();
4323
4324         /* FIXME: Support more cases */
4325         if (MONO_TYPE_ISSTRUCT (sig->ret))
4326                 return NULL;
4327
4328         /*
4329          * The stack contains:
4330          * <delegate>
4331          * <return addr>
4332          */
4333
4334         if (has_target) {
4335                 mono_domain_lock (domain);
4336                 start = code = mono_code_manager_reserve (domain->code_mp, 64);
4337                 mono_domain_unlock (domain);
4338
4339                 /* Replace the this argument with the target */
4340                 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4341                 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
4342                 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
4343                 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4344
4345                 g_assert ((code - start) < 64);
4346         } else {
4347                 if (sig->param_count == 0) {
4348                         mono_domain_lock (domain);
4349                         start = code = mono_code_manager_reserve (domain->code_mp, 32 + (sig->param_count * 8));
4350                         mono_domain_unlock (domain);
4351                 
4352                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4353                         x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4354                 } else {
4355                         /* 
4356                          * The code below does not work in the presence of exceptions, since it 
4357                          * creates a new frame.
4358                          */
4359                         start = NULL;
4360 #if 0
4361                         for (i = 0; i < sig->param_count; ++i)
4362                                 if (!mono_is_regsize_var (sig->params [i]))
4363                                         return NULL;
4364
4365                         mono_domain_lock (domain);
4366                         start = code = mono_code_manager_reserve (domain->code_mp, 32 + (sig->param_count * 8));
4367                         mono_domain_unlock (domain);
4368
4369                         /* Load this == delegate */
4370                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4371
4372                         /* Push arguments in opposite order, taking changes in ESP into account */
4373                         for (i = 0; i < sig->param_count; ++i)
4374                                 x86_push_membase (code, X86_ESP, 4 + (sig->param_count * 4));
4375
4376                         /* Call the delegate */
4377                         x86_call_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4378                         if (sig->param_count > 0)
4379                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, sig->param_count * 4);
4380                         x86_ret (code);
4381 #endif
4382                 }
4383         }
4384
4385         return start;
4386 }