Better error message.
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
17
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/utils/mono-math.h>
23
24 #include "trace.h"
25 #include "mini-x86.h"
26 #include "inssel.h"
27 #include "cpu-x86.h"
28
29 /* On windows, these hold the key returned by TlsAlloc () */
30 static gint lmf_tls_offset = -1;
31 static gint lmf_addr_tls_offset = -1;
32 static gint appdomain_tls_offset = -1;
33 static gint thread_tls_offset = -1;
34
35 #ifdef MONO_XEN_OPT
36 static gboolean optimize_for_xen = TRUE;
37 #else
38 #define optimize_for_xen 0
39 #endif
40
41 #ifdef PLATFORM_WIN32
42 static gboolean is_win32 = TRUE;
43 #else
44 static gboolean is_win32 = FALSE;
45 #endif
46
47 /* This mutex protects architecture specific caches */
48 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
49 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
50 static CRITICAL_SECTION mini_arch_mutex;
51
52 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
53
54 #define ARGS_OFFSET 8
55
56 #ifdef PLATFORM_WIN32
57 /* Under windows, the default pinvoke calling convention is stdcall */
58 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
59 #else
60 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
61 #endif
62
63 #define NOT_IMPLEMENTED g_assert_not_reached ()
64
65 const char*
66 mono_arch_regname (int reg) {
67         switch (reg) {
68         case X86_EAX: return "%eax";
69         case X86_EBX: return "%ebx";
70         case X86_ECX: return "%ecx";
71         case X86_EDX: return "%edx";
72         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
73         case X86_EDI: return "%edi";
74         case X86_ESI: return "%esi";
75         }
76         return "unknown";
77 }
78
79 const char*
80 mono_arch_fregname (int reg) {
81         return "unknown";
82 }
83
84 typedef enum {
85         ArgInIReg,
86         ArgInFloatSSEReg,
87         ArgInDoubleSSEReg,
88         ArgOnStack,
89         ArgValuetypeInReg,
90         ArgOnFloatFpStack,
91         ArgOnDoubleFpStack,
92         ArgNone
93 } ArgStorage;
94
95 typedef struct {
96         gint16 offset;
97         gint8  reg;
98         ArgStorage storage;
99
100         /* Only if storage == ArgValuetypeInReg */
101         ArgStorage pair_storage [2];
102         gint8 pair_regs [2];
103 } ArgInfo;
104
105 typedef struct {
106         int nargs;
107         guint32 stack_usage;
108         guint32 reg_usage;
109         guint32 freg_usage;
110         gboolean need_stack_align;
111         guint32 stack_align_amount;
112         ArgInfo ret;
113         ArgInfo sig_cookie;
114         ArgInfo args [1];
115 } CallInfo;
116
117 #define PARAM_REGS 0
118
119 #define FLOAT_PARAM_REGS 0
120
121 static X86_Reg_No param_regs [] = { 0 };
122
123 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
124 #define SMALL_STRUCTS_IN_REGS
125 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
126 #endif
127
128 static void inline
129 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
130 {
131     ainfo->offset = *stack_size;
132
133     if (*gr >= PARAM_REGS) {
134                 ainfo->storage = ArgOnStack;
135                 (*stack_size) += sizeof (gpointer);
136     }
137     else {
138                 ainfo->storage = ArgInIReg;
139                 ainfo->reg = param_regs [*gr];
140                 (*gr) ++;
141     }
142 }
143
144 static void inline
145 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
146 {
147         ainfo->offset = *stack_size;
148
149         g_assert (PARAM_REGS == 0);
150         
151         ainfo->storage = ArgOnStack;
152         (*stack_size) += sizeof (gpointer) * 2;
153 }
154
155 static void inline
156 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
157 {
158     ainfo->offset = *stack_size;
159
160     if (*gr >= FLOAT_PARAM_REGS) {
161                 ainfo->storage = ArgOnStack;
162                 (*stack_size) += is_double ? 8 : 4;
163     }
164     else {
165                 /* A double register */
166                 if (is_double)
167                         ainfo->storage = ArgInDoubleSSEReg;
168                 else
169                         ainfo->storage = ArgInFloatSSEReg;
170                 ainfo->reg = *gr;
171                 (*gr) += 1;
172     }
173 }
174
175
176 static void
177 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
178                gboolean is_return,
179                guint32 *gr, guint32 *fr, guint32 *stack_size)
180 {
181         guint32 size;
182         MonoClass *klass;
183
184         klass = mono_class_from_mono_type (type);
185         if (sig->pinvoke) 
186                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
187         else 
188                 size = mini_type_stack_size (gsctx, &klass->byval_arg, NULL);
189
190 #ifdef SMALL_STRUCTS_IN_REGS
191         if (sig->pinvoke && is_return) {
192                 MonoMarshalType *info;
193
194                 /*
195                  * the exact rules are not very well documented, the code below seems to work with the 
196                  * code generated by gcc 3.3.3 -mno-cygwin.
197                  */
198                 info = mono_marshal_load_type_info (klass);
199                 g_assert (info);
200
201                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
202
203                 /* Special case structs with only a float member */
204                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
205                         ainfo->storage = ArgValuetypeInReg;
206                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
207                         return;
208                 }
209                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
210                         ainfo->storage = ArgValuetypeInReg;
211                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
212                         return;
213                 }               
214                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
215                         ainfo->storage = ArgValuetypeInReg;
216                         ainfo->pair_storage [0] = ArgInIReg;
217                         ainfo->pair_regs [0] = return_regs [0];
218                         if (info->native_size > 4) {
219                                 ainfo->pair_storage [1] = ArgInIReg;
220                                 ainfo->pair_regs [1] = return_regs [1];
221                         }
222                         return;
223                 }
224         }
225 #endif
226
227         ainfo->offset = *stack_size;
228         ainfo->storage = ArgOnStack;
229         *stack_size += ALIGN_TO (size, sizeof (gpointer));
230 }
231
232 /*
233  * get_call_info:
234  *
235  *  Obtain information about a call according to the calling convention.
236  * For x86 ELF, see the "System V Application Binary Interface Intel386 
237  * Architecture Processor Supplment, Fourth Edition" document for more
238  * information.
239  * For x86 win32, see ???.
240  */
241 static CallInfo*
242 get_call_info (MonoCompile *cfg, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
243 {
244         guint32 i, gr, fr;
245         MonoType *ret_type;
246         int n = sig->hasthis + sig->param_count;
247         guint32 stack_size = 0;
248         CallInfo *cinfo;
249         MonoGenericSharingContext *gsctx = cfg ? cfg->generic_sharing_context : NULL;
250
251         if (mp)
252                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
253         else
254                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
255
256         gr = 0;
257         fr = 0;
258
259         /* return value */
260         {
261                 ret_type = mono_type_get_underlying_type (sig->ret);
262                 ret_type = mini_get_basic_type_from_generic (gsctx, ret_type);
263                 switch (ret_type->type) {
264                 case MONO_TYPE_BOOLEAN:
265                 case MONO_TYPE_I1:
266                 case MONO_TYPE_U1:
267                 case MONO_TYPE_I2:
268                 case MONO_TYPE_U2:
269                 case MONO_TYPE_CHAR:
270                 case MONO_TYPE_I4:
271                 case MONO_TYPE_U4:
272                 case MONO_TYPE_I:
273                 case MONO_TYPE_U:
274                 case MONO_TYPE_PTR:
275                 case MONO_TYPE_FNPTR:
276                 case MONO_TYPE_CLASS:
277                 case MONO_TYPE_OBJECT:
278                 case MONO_TYPE_SZARRAY:
279                 case MONO_TYPE_ARRAY:
280                 case MONO_TYPE_STRING:
281                         cinfo->ret.storage = ArgInIReg;
282                         cinfo->ret.reg = X86_EAX;
283                         break;
284                 case MONO_TYPE_U8:
285                 case MONO_TYPE_I8:
286                         cinfo->ret.storage = ArgInIReg;
287                         cinfo->ret.reg = X86_EAX;
288                         break;
289                 case MONO_TYPE_R4:
290                         cinfo->ret.storage = ArgOnFloatFpStack;
291                         break;
292                 case MONO_TYPE_R8:
293                         cinfo->ret.storage = ArgOnDoubleFpStack;
294                         break;
295                 case MONO_TYPE_GENERICINST:
296                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
297                                 cinfo->ret.storage = ArgInIReg;
298                                 cinfo->ret.reg = X86_EAX;
299                                 break;
300                         }
301                         /* Fall through */
302                 case MONO_TYPE_VALUETYPE: {
303                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
304
305                         add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
306                         if (cinfo->ret.storage == ArgOnStack)
307                                 /* The caller passes the address where the value is stored */
308                                 add_general (&gr, &stack_size, &cinfo->ret);
309                         break;
310                 }
311                 case MONO_TYPE_TYPEDBYREF:
312                         /* Same as a valuetype with size 24 */
313                         add_general (&gr, &stack_size, &cinfo->ret);
314                         ;
315                         break;
316                 case MONO_TYPE_VOID:
317                         cinfo->ret.storage = ArgNone;
318                         break;
319                 default:
320                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
321                 }
322         }
323
324         /* this */
325         if (sig->hasthis)
326                 add_general (&gr, &stack_size, cinfo->args + 0);
327
328         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
329                 gr = PARAM_REGS;
330                 fr = FLOAT_PARAM_REGS;
331                 
332                 /* Emit the signature cookie just before the implicit arguments */
333                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
334         }
335
336         for (i = 0; i < sig->param_count; ++i) {
337                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
338                 MonoType *ptype;
339
340                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
341                         /* We allways pass the sig cookie on the stack for simplicity */
342                         /* 
343                          * Prevent implicit arguments + the sig cookie from being passed 
344                          * in registers.
345                          */
346                         gr = PARAM_REGS;
347                         fr = FLOAT_PARAM_REGS;
348
349                         /* Emit the signature cookie just before the implicit arguments */
350                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
351                 }
352
353                 if (sig->params [i]->byref) {
354                         add_general (&gr, &stack_size, ainfo);
355                         continue;
356                 }
357                 ptype = mono_type_get_underlying_type (sig->params [i]);
358                 ptype = mini_get_basic_type_from_generic (gsctx, ptype);
359                 switch (ptype->type) {
360                 case MONO_TYPE_BOOLEAN:
361                 case MONO_TYPE_I1:
362                 case MONO_TYPE_U1:
363                         add_general (&gr, &stack_size, ainfo);
364                         break;
365                 case MONO_TYPE_I2:
366                 case MONO_TYPE_U2:
367                 case MONO_TYPE_CHAR:
368                         add_general (&gr, &stack_size, ainfo);
369                         break;
370                 case MONO_TYPE_I4:
371                 case MONO_TYPE_U4:
372                         add_general (&gr, &stack_size, ainfo);
373                         break;
374                 case MONO_TYPE_I:
375                 case MONO_TYPE_U:
376                 case MONO_TYPE_PTR:
377                 case MONO_TYPE_FNPTR:
378                 case MONO_TYPE_CLASS:
379                 case MONO_TYPE_OBJECT:
380                 case MONO_TYPE_STRING:
381                 case MONO_TYPE_SZARRAY:
382                 case MONO_TYPE_ARRAY:
383                         add_general (&gr, &stack_size, ainfo);
384                         break;
385                 case MONO_TYPE_GENERICINST:
386                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
387                                 add_general (&gr, &stack_size, ainfo);
388                                 break;
389                         }
390                         /* Fall through */
391                 case MONO_TYPE_VALUETYPE:
392                         add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
393                         break;
394                 case MONO_TYPE_TYPEDBYREF:
395                         stack_size += sizeof (MonoTypedRef);
396                         ainfo->storage = ArgOnStack;
397                         break;
398                 case MONO_TYPE_U8:
399                 case MONO_TYPE_I8:
400                         add_general_pair (&gr, &stack_size, ainfo);
401                         break;
402                 case MONO_TYPE_R4:
403                         add_float (&fr, &stack_size, ainfo, FALSE);
404                         break;
405                 case MONO_TYPE_R8:
406                         add_float (&fr, &stack_size, ainfo, TRUE);
407                         break;
408                 default:
409                         g_error ("unexpected type 0x%x", ptype->type);
410                         g_assert_not_reached ();
411                 }
412         }
413
414         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
415                 gr = PARAM_REGS;
416                 fr = FLOAT_PARAM_REGS;
417                 
418                 /* Emit the signature cookie just before the implicit arguments */
419                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
420         }
421
422 #if defined(__APPLE__)
423         if ((stack_size % 16) != 0) { 
424                 cinfo->need_stack_align = TRUE;
425                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
426         }
427 #endif
428
429         cinfo->stack_usage = stack_size;
430         cinfo->reg_usage = gr;
431         cinfo->freg_usage = fr;
432         return cinfo;
433 }
434
435 /*
436  * mono_arch_get_argument_info:
437  * @csig:  a method signature
438  * @param_count: the number of parameters to consider
439  * @arg_info: an array to store the result infos
440  *
441  * Gathers information on parameters such as size, alignment and
442  * padding. arg_info should be large enought to hold param_count + 1 entries. 
443  *
444  * Returns the size of the activation frame.
445  */
446 int
447 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
448 {
449         int k, frame_size = 0;
450         int size, pad;
451         guint32 align;
452         int offset = 8;
453         CallInfo *cinfo;
454
455         cinfo = get_call_info (NULL, NULL, csig, FALSE);
456
457         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
458                 frame_size += sizeof (gpointer);
459                 offset += 4;
460         }
461
462         arg_info [0].offset = offset;
463
464         if (csig->hasthis) {
465                 frame_size += sizeof (gpointer);
466                 offset += 4;
467         }
468
469         arg_info [0].size = frame_size;
470
471         for (k = 0; k < param_count; k++) {
472                 
473                 if (csig->pinvoke)
474                         size = mono_type_native_stack_size (csig->params [k], &align);
475                 else {
476                         int ialign;
477                         size = mini_type_stack_size (NULL, csig->params [k], &ialign);
478                         align = ialign;
479                 }
480
481                 /* ignore alignment for now */
482                 align = 1;
483
484                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
485                 arg_info [k].pad = pad;
486                 frame_size += size;
487                 arg_info [k + 1].pad = 0;
488                 arg_info [k + 1].size = size;
489                 offset += pad;
490                 arg_info [k + 1].offset = offset;
491                 offset += size;
492         }
493
494         align = MONO_ARCH_FRAME_ALIGNMENT;
495         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
496         arg_info [k].pad = pad;
497
498         g_free (cinfo);
499
500         return frame_size;
501 }
502
503 static const guchar cpuid_impl [] = {
504         0x55,                           /* push   %ebp */
505         0x89, 0xe5,                     /* mov    %esp,%ebp */
506         0x53,                           /* push   %ebx */
507         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
508         0x0f, 0xa2,                     /* cpuid   */
509         0x50,                           /* push   %eax */
510         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
511         0x89, 0x18,                     /* mov    %ebx,(%eax) */
512         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
513         0x89, 0x08,                     /* mov    %ecx,(%eax) */
514         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
515         0x89, 0x10,                     /* mov    %edx,(%eax) */
516         0x58,                           /* pop    %eax */
517         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
518         0x89, 0x02,                     /* mov    %eax,(%edx) */
519         0x5b,                           /* pop    %ebx */
520         0xc9,                           /* leave   */
521         0xc3,                           /* ret     */
522 };
523
524 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
525
526 static int 
527 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
528 {
529         int have_cpuid = 0;
530 #ifndef _MSC_VER
531         __asm__  __volatile__ (
532                 "pushfl\n"
533                 "popl %%eax\n"
534                 "movl %%eax, %%edx\n"
535                 "xorl $0x200000, %%eax\n"
536                 "pushl %%eax\n"
537                 "popfl\n"
538                 "pushfl\n"
539                 "popl %%eax\n"
540                 "xorl %%edx, %%eax\n"
541                 "andl $0x200000, %%eax\n"
542                 "movl %%eax, %0"
543                 : "=r" (have_cpuid)
544                 :
545                 : "%eax", "%edx"
546         );
547 #else
548         __asm {
549                 pushfd
550                 pop eax
551                 mov edx, eax
552                 xor eax, 0x200000
553                 push eax
554                 popfd
555                 pushfd
556                 pop eax
557                 xor eax, edx
558                 and eax, 0x200000
559                 mov have_cpuid, eax
560         }
561 #endif
562         if (have_cpuid) {
563                 /* Have to use the code manager to get around WinXP DEP */
564                 static CpuidFunc func = NULL;
565                 void *ptr;
566                 if (!func) {
567                         ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
568                         memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
569                         func = (CpuidFunc)ptr;
570                 }
571                 func (id, p_eax, p_ebx, p_ecx, p_edx);
572
573                 /*
574                  * We use this approach because of issues with gcc and pic code, see:
575                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
576                 __asm__ __volatile__ ("cpuid"
577                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
578                         : "a" (id));
579                 */
580                 return 1;
581         }
582         return 0;
583 }
584
585 /*
586  * Initialize the cpu to execute managed code.
587  */
588 void
589 mono_arch_cpu_init (void)
590 {
591         /* spec compliance requires running with double precision */
592 #ifndef _MSC_VER
593         guint16 fpcw;
594
595         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
596         fpcw &= ~X86_FPCW_PRECC_MASK;
597         fpcw |= X86_FPCW_PREC_DOUBLE;
598         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
599         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
600 #else
601         _control87 (_PC_53, MCW_PC);
602 #endif
603 }
604
605 /*
606  * Initialize architecture specific code.
607  */
608 void
609 mono_arch_init (void)
610 {
611         InitializeCriticalSection (&mini_arch_mutex);
612 }
613
614 /*
615  * Cleanup architecture specific code.
616  */
617 void
618 mono_arch_cleanup (void)
619 {
620         DeleteCriticalSection (&mini_arch_mutex);
621 }
622
623 /*
624  * This function returns the optimizations supported on this cpu.
625  */
626 guint32
627 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
628 {
629         int eax, ebx, ecx, edx;
630         guint32 opts = 0;
631         
632         *exclude_mask = 0;
633         /* Feature Flags function, flags returned in EDX. */
634         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
635                 if (edx & (1 << 15)) {
636                         opts |= MONO_OPT_CMOV;
637                         if (edx & 1)
638                                 opts |= MONO_OPT_FCMOV;
639                         else
640                                 *exclude_mask |= MONO_OPT_FCMOV;
641                 } else
642                         *exclude_mask |= MONO_OPT_CMOV;
643                 if (edx & (1 << 26))
644                         opts |= MONO_OPT_SSE2;
645                 else
646                         *exclude_mask |= MONO_OPT_SSE2;
647         }
648         return opts;
649 }
650
651 /*
652  * Determine whenever the trap whose info is in SIGINFO is caused by
653  * integer overflow.
654  */
655 gboolean
656 mono_arch_is_int_overflow (void *sigctx, void *info)
657 {
658         MonoContext ctx;
659         guint8* ip;
660
661         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
662
663         ip = (guint8*)ctx.eip;
664
665         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
666                 gint32 reg;
667
668                 /* idiv REG */
669                 switch (x86_modrm_rm (ip [1])) {
670                 case X86_EAX:
671                         reg = ctx.eax;
672                         break;
673                 case X86_ECX:
674                         reg = ctx.ecx;
675                         break;
676                 case X86_EDX:
677                         reg = ctx.edx;
678                         break;
679                 case X86_EBX:
680                         reg = ctx.ebx;
681                         break;
682                 case X86_ESI:
683                         reg = ctx.esi;
684                         break;
685                 case X86_EDI:
686                         reg = ctx.edi;
687                         break;
688                 default:
689                         g_assert_not_reached ();
690                         reg = -1;
691                 }
692
693                 if (reg == -1)
694                         return TRUE;
695         }
696                         
697         return FALSE;
698 }
699
700 GList *
701 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
702 {
703         GList *vars = NULL;
704         int i;
705
706         for (i = 0; i < cfg->num_varinfo; i++) {
707                 MonoInst *ins = cfg->varinfo [i];
708                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
709
710                 /* unused vars */
711                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
712                         continue;
713
714                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
715                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
716                         continue;
717
718                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
719                  * 8bit quantities in caller saved registers on x86 */
720                 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
721                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
722                         g_assert (i == vmv->idx);
723                         vars = g_list_prepend (vars, vmv);
724                 }
725         }
726
727         vars = mono_varlist_sort (cfg, vars, 0);
728
729         return vars;
730 }
731
732 GList *
733 mono_arch_get_global_int_regs (MonoCompile *cfg)
734 {
735         GList *regs = NULL;
736
737         /* we can use 3 registers for global allocation */
738         regs = g_list_prepend (regs, (gpointer)X86_EBX);
739         regs = g_list_prepend (regs, (gpointer)X86_ESI);
740         regs = g_list_prepend (regs, (gpointer)X86_EDI);
741
742         return regs;
743 }
744
745 /*
746  * mono_arch_regalloc_cost:
747  *
748  *  Return the cost, in number of memory references, of the action of 
749  * allocating the variable VMV into a register during global register
750  * allocation.
751  */
752 guint32
753 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
754 {
755         MonoInst *ins = cfg->varinfo [vmv->idx];
756
757         if (cfg->method->save_lmf)
758                 /* The register is already saved */
759                 return (ins->opcode == OP_ARG) ? 1 : 0;
760         else
761                 /* push+pop+possible load if it is an argument */
762                 return (ins->opcode == OP_ARG) ? 3 : 2;
763 }
764  
765 /*
766  * Set var information according to the calling convention. X86 version.
767  * The locals var stuff should most likely be split in another method.
768  */
769 void
770 mono_arch_allocate_vars (MonoCompile *cfg)
771 {
772         MonoMethodSignature *sig;
773         MonoMethodHeader *header;
774         MonoInst *inst;
775         guint32 locals_stack_size, locals_stack_align;
776         int i, offset;
777         gint32 *offsets;
778         CallInfo *cinfo;
779
780         header = mono_method_get_header (cfg->method);
781         sig = mono_method_signature (cfg->method);
782
783         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
784
785         cfg->frame_reg = MONO_ARCH_BASEREG;
786         offset = 0;
787
788         /* Reserve space to save LMF and caller saved registers */
789
790         if (cfg->method->save_lmf) {
791                 offset += sizeof (MonoLMF);
792         } else {
793                 if (cfg->used_int_regs & (1 << X86_EBX)) {
794                         offset += 4;
795                 }
796
797                 if (cfg->used_int_regs & (1 << X86_EDI)) {
798                         offset += 4;
799                 }
800
801                 if (cfg->used_int_regs & (1 << X86_ESI)) {
802                         offset += 4;
803                 }
804         }
805
806         switch (cinfo->ret.storage) {
807         case ArgValuetypeInReg:
808                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
809                 offset += 8;
810                 cfg->ret->opcode = OP_REGOFFSET;
811                 cfg->ret->inst_basereg = X86_EBP;
812                 cfg->ret->inst_offset = - offset;
813                 break;
814         default:
815                 break;
816         }
817
818         /* Allocate locals */
819         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
820         if (locals_stack_align) {
821                 offset += (locals_stack_align - 1);
822                 offset &= ~(locals_stack_align - 1);
823         }
824         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
825                 if (offsets [i] != -1) {
826                         MonoInst *inst = cfg->varinfo [i];
827                         inst->opcode = OP_REGOFFSET;
828                         inst->inst_basereg = X86_EBP;
829                         inst->inst_offset = - (offset + offsets [i]);
830                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
831                 }
832         }
833         offset += locals_stack_size;
834
835
836         /*
837          * Allocate arguments+return value
838          */
839
840         switch (cinfo->ret.storage) {
841         case ArgOnStack:
842                 cfg->ret->opcode = OP_REGOFFSET;
843                 cfg->ret->inst_basereg = X86_EBP;
844                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
845                 break;
846         case ArgValuetypeInReg:
847                 break;
848         case ArgInIReg:
849                 cfg->ret->opcode = OP_REGVAR;
850                 cfg->ret->inst_c0 = cinfo->ret.reg;
851                 break;
852         case ArgNone:
853         case ArgOnFloatFpStack:
854         case ArgOnDoubleFpStack:
855                 break;
856         default:
857                 g_assert_not_reached ();
858         }
859
860         if (sig->call_convention == MONO_CALL_VARARG) {
861                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
862                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
863         }
864
865         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
866                 ArgInfo *ainfo = &cinfo->args [i];
867                 inst = cfg->args [i];
868                 if (inst->opcode != OP_REGVAR) {
869                         inst->opcode = OP_REGOFFSET;
870                         inst->inst_basereg = X86_EBP;
871                 }
872                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
873         }
874
875         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
876         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
877
878         cfg->stack_offset = offset;
879 }
880
881 void
882 mono_arch_create_vars (MonoCompile *cfg)
883 {
884         MonoMethodSignature *sig;
885         CallInfo *cinfo;
886
887         sig = mono_method_signature (cfg->method);
888
889         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
890
891         if (cinfo->ret.storage == ArgValuetypeInReg)
892                 cfg->ret_var_is_local = TRUE;
893 }
894
895 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
896  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
897  */
898
899 static void
900 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
901 {
902         MonoInst *arg;
903         MonoMethodSignature *tmp_sig;
904         MonoInst *sig_arg;
905
906         /* FIXME: Add support for signature tokens to AOT */
907         cfg->disable_aot = TRUE;
908         MONO_INST_NEW (cfg, arg, OP_OUTARG);
909
910         /*
911          * mono_ArgIterator_Setup assumes the signature cookie is 
912          * passed first and all the arguments which were before it are
913          * passed on the stack after the signature. So compensate by 
914          * passing a different signature.
915          */
916         tmp_sig = mono_metadata_signature_dup (call->signature);
917         tmp_sig->param_count -= call->signature->sentinelpos;
918         tmp_sig->sentinelpos = 0;
919         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
920
921         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
922         sig_arg->inst_p0 = tmp_sig;
923
924         arg->inst_left = sig_arg;
925         arg->type = STACK_PTR;
926         /* prepend, so they get reversed */
927         arg->next = call->out_args;
928         call->out_args = arg;
929 }
930
931 /*
932  * It is expensive to adjust esp for each individual fp argument pushed on the stack
933  * so we try to do it just once when we have multiple fp arguments in a row.
934  * We don't use this mechanism generally because for int arguments the generated code
935  * is slightly bigger and new generation cpus optimize away the dependency chains
936  * created by push instructions on the esp value.
937  * fp_arg_setup is the first argument in the execution sequence where the esp register
938  * is modified.
939  */
940 static int
941 collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
942 {
943         int fp_space = 0;
944         MonoType *t;
945
946         for (; start_arg < sig->param_count; ++start_arg) {
947                 t = mono_type_get_underlying_type (sig->params [start_arg]);
948                 if (!t->byref && t->type == MONO_TYPE_R8) {
949                         fp_space += sizeof (double);
950                         *fp_arg_setup = start_arg;
951                 } else {
952                         break;
953                 }
954         }
955         return fp_space;
956 }
957
958 /* 
959  * take the arguments and generate the arch-specific
960  * instructions to properly call the function in call.
961  * This includes pushing, moving arguments to the right register
962  * etc.
963  */
964 MonoCallInst*
965 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
966         MonoInst *arg, *in;
967         MonoMethodSignature *sig;
968         int i, n;
969         CallInfo *cinfo;
970         int sentinelpos = 0;
971         int fp_args_space = 0, fp_args_offset = 0, fp_arg_setup = -1;
972
973         sig = call->signature;
974         n = sig->param_count + sig->hasthis;
975
976         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
977
978         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
979                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
980
981         for (i = 0; i < n; ++i) {
982                 ArgInfo *ainfo = cinfo->args + i;
983
984                 /* Emit the signature cookie just before the implicit arguments */
985                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
986                         emit_sig_cookie (cfg, call);
987                 }
988
989                 if (is_virtual && i == 0) {
990                         /* the argument will be attached to the call instrucion */
991                         in = call->args [i];
992                 } else {
993                         MonoType *t;
994
995                         if (i >= sig->hasthis)
996                                 t = sig->params [i - sig->hasthis];
997                         else
998                                 t = &mono_defaults.int_class->byval_arg;
999                         t = mono_type_get_underlying_type (t);
1000
1001                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1002                         in = call->args [i];
1003                         arg->cil_code = in->cil_code;
1004                         arg->inst_left = in;
1005                         arg->type = in->type;
1006                         /* prepend, so they get reversed */
1007                         arg->next = call->out_args;
1008                         call->out_args = arg;
1009
1010                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
1011                                 guint32 size, align;
1012
1013                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
1014                                         size = sizeof (MonoTypedRef);
1015                                         align = sizeof (gpointer);
1016                                 }
1017                                 else
1018                                         if (sig->pinvoke)
1019                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
1020                                         else {
1021                                                 int ialign;
1022                                                 size = mini_type_stack_size (cfg->generic_sharing_context, &in->klass->byval_arg, &ialign);
1023                                                 align = ialign;
1024                                         }
1025                                 arg->opcode = OP_OUTARG_VT;
1026                                 arg->klass = in->klass;
1027                                 arg->backend.is_pinvoke = sig->pinvoke;
1028                                 arg->inst_imm = size; 
1029                         }
1030                         else {
1031                                 switch (ainfo->storage) {
1032                                 case ArgOnStack:
1033                                         arg->opcode = OP_OUTARG;
1034                                         if (!t->byref) {
1035                                                 if (t->type == MONO_TYPE_R4) {
1036                                                         arg->opcode = OP_OUTARG_R4;
1037                                                 } else if (t->type == MONO_TYPE_R8) {
1038                                                         arg->opcode = OP_OUTARG_R8;
1039                                                         /* we store in the upper bits of backen.arg_info the needed
1040                                                          * esp adjustment and in the lower bits the offset from esp
1041                                                          * where the arg needs to be stored
1042                                                          */
1043                                                         if (!fp_args_space) {
1044                                                                 fp_args_space = collect_fp_stack_space (sig, i - sig->hasthis, &fp_arg_setup);
1045                                                                 fp_args_offset = fp_args_space;
1046                                                         }
1047                                                         arg->backend.arg_info = fp_args_space - fp_args_offset;
1048                                                         fp_args_offset -= sizeof (double);
1049                                                         if (i - sig->hasthis == fp_arg_setup) {
1050                                                                 arg->backend.arg_info |= fp_args_space << 16;
1051                                                         }
1052                                                         if (fp_args_offset == 0) {
1053                                                                 /* the allocated esp stack is finished:
1054                                                                  * prepare for an eventual second run of fp args
1055                                                                  */
1056                                                                 fp_args_space = 0;
1057                                                         }
1058                                                 }
1059                                         }
1060                                         break;
1061                                 default:
1062                                         g_assert_not_reached ();
1063                                 }
1064                         }
1065                 }
1066         }
1067
1068         /* Handle the case where there are no implicit arguments */
1069         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1070                 emit_sig_cookie (cfg, call);
1071         }
1072
1073         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1074                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1075                         MonoInst *zero_inst;
1076                         /*
1077                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1078                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1079                          * before calling the function. So we add a dummy instruction to represent pushing the 
1080                          * struct return address to the stack. The return address will be saved to this stack slot 
1081                          * by the code emitted in this_vret_args.
1082                          */
1083                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1084                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1085                         zero_inst->inst_p0 = 0;
1086                         arg->inst_left = zero_inst;
1087                         arg->type = STACK_PTR;
1088                         /* prepend, so they get reversed */
1089                         arg->next = call->out_args;
1090                         call->out_args = arg;
1091                 }
1092                 else
1093                         /* if the function returns a struct, the called method already does a ret $0x4 */
1094                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1095                                 cinfo->stack_usage -= 4;
1096         }
1097         
1098         call->stack_usage = cinfo->stack_usage;
1099
1100 #if defined(__APPLE__)
1101         if (cinfo->need_stack_align) {
1102                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1103                 arg->inst_c0 = cinfo->stack_align_amount;
1104                 arg->next = call->out_args;
1105                 call->out_args = arg;
1106         }
1107 #endif 
1108
1109         return call;
1110 }
1111
1112 /*
1113  * Allow tracing to work with this interface (with an optional argument)
1114  */
1115 void*
1116 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1117 {
1118         guchar *code = p;
1119
1120 #if __APPLE__
1121         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1122 #endif
1123
1124         /* if some args are passed in registers, we need to save them here */
1125         x86_push_reg (code, X86_EBP);
1126
1127         if (cfg->compile_aot) {
1128                 x86_push_imm (code, cfg->method);
1129                 x86_mov_reg_imm (code, X86_EAX, func);
1130                 x86_call_reg (code, X86_EAX);
1131         } else {
1132                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1133                 x86_push_imm (code, cfg->method);
1134                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1135                 x86_call_code (code, 0);
1136         }
1137 #if __APPLE__
1138         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 16);
1139 #else
1140         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1141 #endif
1142
1143         return code;
1144 }
1145
1146 enum {
1147         SAVE_NONE,
1148         SAVE_STRUCT,
1149         SAVE_EAX,
1150         SAVE_EAX_EDX,
1151         SAVE_FP
1152 };
1153
1154 void*
1155 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1156 {
1157         guchar *code = p;
1158         int arg_size = 0, save_mode = SAVE_NONE;
1159         MonoMethod *method = cfg->method;
1160         
1161         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1162         case MONO_TYPE_VOID:
1163                 /* special case string .ctor icall */
1164                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1165                         save_mode = SAVE_EAX;
1166                 else
1167                         save_mode = SAVE_NONE;
1168                 break;
1169         case MONO_TYPE_I8:
1170         case MONO_TYPE_U8:
1171                 save_mode = SAVE_EAX_EDX;
1172                 break;
1173         case MONO_TYPE_R4:
1174         case MONO_TYPE_R8:
1175                 save_mode = SAVE_FP;
1176                 break;
1177         case MONO_TYPE_GENERICINST:
1178                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1179                         save_mode = SAVE_EAX;
1180                         break;
1181                 }
1182                 /* Fall through */
1183         case MONO_TYPE_VALUETYPE:
1184                 save_mode = SAVE_STRUCT;
1185                 break;
1186         default:
1187                 save_mode = SAVE_EAX;
1188                 break;
1189         }
1190
1191         switch (save_mode) {
1192         case SAVE_EAX_EDX:
1193                 x86_push_reg (code, X86_EDX);
1194                 x86_push_reg (code, X86_EAX);
1195                 if (enable_arguments) {
1196                         x86_push_reg (code, X86_EDX);
1197                         x86_push_reg (code, X86_EAX);
1198                         arg_size = 8;
1199                 }
1200                 break;
1201         case SAVE_EAX:
1202                 x86_push_reg (code, X86_EAX);
1203                 if (enable_arguments) {
1204                         x86_push_reg (code, X86_EAX);
1205                         arg_size = 4;
1206                 }
1207                 break;
1208         case SAVE_FP:
1209                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1210                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1211                 if (enable_arguments) {
1212                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1213                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1214                         arg_size = 8;
1215                 }
1216                 break;
1217         case SAVE_STRUCT:
1218                 if (enable_arguments) {
1219                         x86_push_membase (code, X86_EBP, 8);
1220                         arg_size = 4;
1221                 }
1222                 break;
1223         case SAVE_NONE:
1224         default:
1225                 break;
1226         }
1227
1228         if (cfg->compile_aot) {
1229                 x86_push_imm (code, method);
1230                 x86_mov_reg_imm (code, X86_EAX, func);
1231                 x86_call_reg (code, X86_EAX);
1232         } else {
1233                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1234                 x86_push_imm (code, method);
1235                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1236                 x86_call_code (code, 0);
1237         }
1238         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1239
1240         switch (save_mode) {
1241         case SAVE_EAX_EDX:
1242                 x86_pop_reg (code, X86_EAX);
1243                 x86_pop_reg (code, X86_EDX);
1244                 break;
1245         case SAVE_EAX:
1246                 x86_pop_reg (code, X86_EAX);
1247                 break;
1248         case SAVE_FP:
1249                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1250                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1251                 break;
1252         case SAVE_NONE:
1253         default:
1254                 break;
1255         }
1256
1257         return code;
1258 }
1259
1260 #define EMIT_COND_BRANCH(ins,cond,sign) \
1261 if (ins->flags & MONO_INST_BRLABEL) { \
1262         if (ins->inst_i0->inst_c0) { \
1263                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1264         } else { \
1265                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1266                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1267                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1268                         x86_branch8 (code, cond, 0, sign); \
1269                 else \
1270                         x86_branch32 (code, cond, 0, sign); \
1271         } \
1272 } else { \
1273         if (ins->inst_true_bb->native_offset) { \
1274                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1275         } else { \
1276                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1277                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1278                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1279                         x86_branch8 (code, cond, 0, sign); \
1280                 else \
1281                         x86_branch32 (code, cond, 0, sign); \
1282         } \
1283 }
1284
1285 /*  
1286  *      Emit an exception if condition is fail and
1287  *  if possible do a directly branch to target 
1288  */
1289 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1290         do {                                                        \
1291                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1292                 if (tins == NULL) {                                                                             \
1293                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1294                                         MONO_PATCH_INFO_EXC, exc_name);  \
1295                         x86_branch32 (code, cond, 0, signed);               \
1296                 } else {        \
1297                         EMIT_COND_BRANCH (tins, cond, signed);  \
1298                 }                       \
1299         } while (0); 
1300
1301 #define EMIT_FPCOMPARE(code) do { \
1302         x86_fcompp (code); \
1303         x86_fnstsw (code); \
1304 } while (0); 
1305
1306
1307 static guint8*
1308 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1309 {
1310         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1311         x86_call_code (code, 0);
1312
1313         return code;
1314 }
1315
1316 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1317
1318 /*
1319  * peephole_pass_1:
1320  *
1321  *   Perform peephole opts which should/can be performed before local regalloc
1322  */
1323 static void
1324 peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1325 {
1326         MonoInst *ins, *last_ins = NULL;
1327         ins = bb->code;
1328
1329         while (ins) {
1330                 switch (ins->opcode) {
1331                 case OP_IADD_IMM:
1332                 case OP_ADD_IMM:
1333                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1334                                 /* 
1335                                  * X86_LEA is like ADD, but doesn't have the
1336                                  * sreg1==dreg restriction.
1337                                  */
1338                                 ins->opcode = OP_X86_LEA_MEMBASE;
1339                                 ins->inst_basereg = ins->sreg1;
1340                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1341                                 ins->opcode = OP_X86_INC_REG;
1342                         break;
1343                 case OP_SUB_IMM:
1344                 case OP_ISUB_IMM:
1345                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1346                                 ins->opcode = OP_X86_LEA_MEMBASE;
1347                                 ins->inst_basereg = ins->sreg1;
1348                                 ins->inst_imm = -ins->inst_imm;
1349                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1350                                 ins->opcode = OP_X86_DEC_REG;
1351                         break;
1352                 case OP_COMPARE_IMM:
1353                 case OP_ICOMPARE_IMM:
1354                         /* OP_COMPARE_IMM (reg, 0) 
1355                          * --> 
1356                          * OP_X86_TEST_NULL (reg) 
1357                          */
1358                         if (!ins->inst_imm)
1359                                 ins->opcode = OP_X86_TEST_NULL;
1360                         break;
1361                 case OP_X86_COMPARE_MEMBASE_IMM:
1362                         /* 
1363                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1364                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1365                          * -->
1366                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1367                          * OP_COMPARE_IMM reg, imm
1368                          *
1369                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1370                          */
1371                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1372                             ins->inst_basereg == last_ins->inst_destbasereg &&
1373                             ins->inst_offset == last_ins->inst_offset) {
1374                                         ins->opcode = OP_COMPARE_IMM;
1375                                         ins->sreg1 = last_ins->sreg1;
1376
1377                                         /* check if we can remove cmp reg,0 with test null */
1378                                         if (!ins->inst_imm)
1379                                                 ins->opcode = OP_X86_TEST_NULL;
1380                                 }
1381
1382                         break;
1383                 case OP_LOAD_MEMBASE:
1384                 case OP_LOADI4_MEMBASE:
1385                         /* 
1386                          * Note: if reg1 = reg2 the load op is removed
1387                          *
1388                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1389                          * OP_LOAD_MEMBASE offset(basereg), reg2
1390                          * -->
1391                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1392                          * OP_MOVE reg1, reg2
1393                          */
1394                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1395                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1396                             ins->inst_basereg == last_ins->inst_destbasereg &&
1397                             ins->inst_offset == last_ins->inst_offset) {
1398                                 if (ins->dreg == last_ins->sreg1) {
1399                                         last_ins->next = ins->next;                             
1400                                         ins = ins->next;                                
1401                                         continue;
1402                                 } else {
1403                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1404                                         ins->opcode = OP_MOVE;
1405                                         ins->sreg1 = last_ins->sreg1;
1406                                 }
1407
1408                         /* 
1409                          * Note: reg1 must be different from the basereg in the second load
1410                          * Note: if reg1 = reg2 is equal then second load is removed
1411                          *
1412                          * OP_LOAD_MEMBASE offset(basereg), reg1
1413                          * OP_LOAD_MEMBASE offset(basereg), reg2
1414                          * -->
1415                          * OP_LOAD_MEMBASE offset(basereg), reg1
1416                          * OP_MOVE reg1, reg2
1417                          */
1418                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1419                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1420                               ins->inst_basereg != last_ins->dreg &&
1421                               ins->inst_basereg == last_ins->inst_basereg &&
1422                               ins->inst_offset == last_ins->inst_offset) {
1423
1424                                 if (ins->dreg == last_ins->dreg) {
1425                                         last_ins->next = ins->next;                             
1426                                         ins = ins->next;                                
1427                                         continue;
1428                                 } else {
1429                                         ins->opcode = OP_MOVE;
1430                                         ins->sreg1 = last_ins->dreg;
1431                                 }
1432
1433                                 //g_assert_not_reached ();
1434
1435 #if 0
1436                         /* 
1437                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1438                          * OP_LOAD_MEMBASE offset(basereg), reg
1439                          * -->
1440                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1441                          * OP_ICONST reg, imm
1442                          */
1443                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1444                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1445                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1446                                    ins->inst_offset == last_ins->inst_offset) {
1447                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1448                                 ins->opcode = OP_ICONST;
1449                                 ins->inst_c0 = last_ins->inst_imm;
1450                                 g_assert_not_reached (); // check this rule
1451 #endif
1452                         }
1453                         break;
1454                 case OP_LOADU1_MEMBASE:
1455                 case OP_LOADI1_MEMBASE:
1456                         /* 
1457                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1458                          * OP_LOAD_MEMBASE offset(basereg), reg2
1459                          * -->
1460                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1461                          * CONV_I2/U2 reg1, reg2
1462                          */
1463                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1464                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1465                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1466                                         ins->inst_offset == last_ins->inst_offset) {
1467                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1468                                 ins->sreg1 = last_ins->sreg1;
1469                         }
1470                         break;
1471                 case OP_LOADU2_MEMBASE:
1472                 case OP_LOADI2_MEMBASE:
1473                         /* 
1474                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1475                          * OP_LOAD_MEMBASE offset(basereg), reg2
1476                          * -->
1477                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1478                          * CONV_I2/U2 reg1, reg2
1479                          */
1480                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1481                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1482                                         ins->inst_offset == last_ins->inst_offset) {
1483                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1484                                 ins->sreg1 = last_ins->sreg1;
1485                         }
1486                         break;
1487                 case CEE_CONV_I4:
1488                 case CEE_CONV_U4:
1489                 case OP_ICONV_TO_I4:
1490                 case OP_MOVE:
1491                         /*
1492                          * Removes:
1493                          *
1494                          * OP_MOVE reg, reg 
1495                          */
1496                         if (ins->dreg == ins->sreg1) {
1497                                 if (last_ins)
1498                                         last_ins->next = ins->next;                             
1499                                 ins = ins->next;
1500                                 continue;
1501                         }
1502                         /* 
1503                          * Removes:
1504                          *
1505                          * OP_MOVE sreg, dreg 
1506                          * OP_MOVE dreg, sreg
1507                          */
1508                         if (last_ins && last_ins->opcode == OP_MOVE &&
1509                             ins->sreg1 == last_ins->dreg &&
1510                             ins->dreg == last_ins->sreg1) {
1511                                 last_ins->next = ins->next;                             
1512                                 ins = ins->next;                                
1513                                 continue;
1514                         }
1515                         break;
1516                         
1517                 case OP_X86_PUSH_MEMBASE:
1518                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1519                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1520                             ins->inst_basereg == last_ins->inst_destbasereg &&
1521                             ins->inst_offset == last_ins->inst_offset) {
1522                                     ins->opcode = OP_X86_PUSH;
1523                                     ins->sreg1 = last_ins->sreg1;
1524                         }
1525                         break;
1526                 }
1527                 last_ins = ins;
1528                 ins = ins->next;
1529         }
1530         bb->last_ins = last_ins;
1531 }
1532
1533 static void
1534 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1535 {
1536         MonoInst *ins, *last_ins = NULL;
1537         ins = bb->code;
1538
1539         while (ins) {
1540
1541                 switch (ins->opcode) {
1542                 case OP_ICONST:
1543                         /* reg = 0 -> XOR (reg, reg) */
1544                         /* XOR sets cflags on x86, so we cant do it always */
1545                         if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1546                                 MonoInst *ins2;
1547
1548                                 ins->opcode = OP_IXOR;
1549                                 ins->sreg1 = ins->dreg;
1550                                 ins->sreg2 = ins->dreg;
1551
1552                                 /* 
1553                                  * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG 
1554                                  * since it takes 3 bytes instead of 7.
1555                                  */
1556                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1557                                         if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1558                                                 ins2->opcode = OP_STORE_MEMBASE_REG;
1559                                                 ins2->sreg1 = ins->dreg;
1560                                         }
1561                                         else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1562                                                 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1563                                                 ins2->sreg1 = ins->dreg;
1564                                         }
1565                                         else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1566                                                 /* Continue iteration */
1567                                         }
1568                                         else
1569                                                 break;
1570                                 }
1571                         }
1572                         break;
1573                 case OP_IADD_IMM:
1574                 case OP_ADD_IMM:
1575                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1576                                 ins->opcode = OP_X86_INC_REG;
1577                         break;
1578                 case OP_ISUB_IMM:
1579                 case OP_SUB_IMM:
1580                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1581                                 ins->opcode = OP_X86_DEC_REG;
1582                         break;
1583                 case OP_X86_COMPARE_MEMBASE_IMM:
1584                         /* 
1585                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1586                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1587                          * -->
1588                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1589                          * OP_COMPARE_IMM reg, imm
1590                          *
1591                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1592                          */
1593                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1594                             ins->inst_basereg == last_ins->inst_destbasereg &&
1595                             ins->inst_offset == last_ins->inst_offset) {
1596                                         ins->opcode = OP_COMPARE_IMM;
1597                                         ins->sreg1 = last_ins->sreg1;
1598
1599                                         /* check if we can remove cmp reg,0 with test null */
1600                                         if (!ins->inst_imm)
1601                                                 ins->opcode = OP_X86_TEST_NULL;
1602                                 }
1603
1604                         break;
1605                 case OP_LOAD_MEMBASE:
1606                 case OP_LOADI4_MEMBASE:
1607                         /* 
1608                          * Note: if reg1 = reg2 the load op is removed
1609                          *
1610                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1611                          * OP_LOAD_MEMBASE offset(basereg), reg2
1612                          * -->
1613                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1614                          * OP_MOVE reg1, reg2
1615                          */
1616                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1617                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1618                             ins->inst_basereg == last_ins->inst_destbasereg &&
1619                             ins->inst_offset == last_ins->inst_offset) {
1620                                 if (ins->dreg == last_ins->sreg1) {
1621                                         last_ins->next = ins->next;                             
1622                                         ins = ins->next;                                
1623                                         continue;
1624                                 } else {
1625                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1626                                         ins->opcode = OP_MOVE;
1627                                         ins->sreg1 = last_ins->sreg1;
1628                                 }
1629
1630                         /* 
1631                          * Note: reg1 must be different from the basereg in the second load
1632                          * Note: if reg1 = reg2 is equal then second load is removed
1633                          *
1634                          * OP_LOAD_MEMBASE offset(basereg), reg1
1635                          * OP_LOAD_MEMBASE offset(basereg), reg2
1636                          * -->
1637                          * OP_LOAD_MEMBASE offset(basereg), reg1
1638                          * OP_MOVE reg1, reg2
1639                          */
1640                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1641                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1642                               ins->inst_basereg != last_ins->dreg &&
1643                               ins->inst_basereg == last_ins->inst_basereg &&
1644                               ins->inst_offset == last_ins->inst_offset) {
1645
1646                                 if (ins->dreg == last_ins->dreg) {
1647                                         last_ins->next = ins->next;                             
1648                                         ins = ins->next;                                
1649                                         continue;
1650                                 } else {
1651                                         ins->opcode = OP_MOVE;
1652                                         ins->sreg1 = last_ins->dreg;
1653                                 }
1654
1655                                 //g_assert_not_reached ();
1656
1657 #if 0
1658                         /* 
1659                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1660                          * OP_LOAD_MEMBASE offset(basereg), reg
1661                          * -->
1662                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1663                          * OP_ICONST reg, imm
1664                          */
1665                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1666                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1667                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1668                                    ins->inst_offset == last_ins->inst_offset) {
1669                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1670                                 ins->opcode = OP_ICONST;
1671                                 ins->inst_c0 = last_ins->inst_imm;
1672                                 g_assert_not_reached (); // check this rule
1673 #endif
1674                         }
1675                         break;
1676                 case OP_LOADU1_MEMBASE:
1677                 case OP_LOADI1_MEMBASE:
1678                         /* 
1679                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1680                          * OP_LOAD_MEMBASE offset(basereg), reg2
1681                          * -->
1682                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1683                          * CONV_I2/U2 reg1, reg2
1684                          */
1685                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1686                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1687                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1688                                         ins->inst_offset == last_ins->inst_offset) {
1689                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1690                                 ins->sreg1 = last_ins->sreg1;
1691                         }
1692                         break;
1693                 case OP_LOADU2_MEMBASE:
1694                 case OP_LOADI2_MEMBASE:
1695                         /* 
1696                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1697                          * OP_LOAD_MEMBASE offset(basereg), reg2
1698                          * -->
1699                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1700                          * CONV_I2/U2 reg1, reg2
1701                          */
1702                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1703                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1704                                         ins->inst_offset == last_ins->inst_offset) {
1705                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1706                                 ins->sreg1 = last_ins->sreg1;
1707                         }
1708                         break;
1709                 case CEE_CONV_I4:
1710                 case CEE_CONV_U4:
1711                 case OP_ICONV_TO_I4:
1712                 case OP_MOVE:
1713                         /*
1714                          * Removes:
1715                          *
1716                          * OP_MOVE reg, reg 
1717                          */
1718                         if (ins->dreg == ins->sreg1) {
1719                                 if (last_ins)
1720                                         last_ins->next = ins->next;                             
1721                                 ins = ins->next;
1722                                 continue;
1723                         }
1724                         /* 
1725                          * Removes:
1726                          *
1727                          * OP_MOVE sreg, dreg 
1728                          * OP_MOVE dreg, sreg
1729                          */
1730                         if (last_ins && last_ins->opcode == OP_MOVE &&
1731                             ins->sreg1 == last_ins->dreg &&
1732                             ins->dreg == last_ins->sreg1) {
1733                                 last_ins->next = ins->next;                             
1734                                 ins = ins->next;                                
1735                                 continue;
1736                         }
1737                         break;
1738                 case OP_X86_PUSH_MEMBASE:
1739                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1740                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1741                             ins->inst_basereg == last_ins->inst_destbasereg &&
1742                             ins->inst_offset == last_ins->inst_offset) {
1743                                     ins->opcode = OP_X86_PUSH;
1744                                     ins->sreg1 = last_ins->sreg1;
1745                         }
1746                         break;
1747                 }
1748                 last_ins = ins;
1749                 ins = ins->next;
1750         }
1751         bb->last_ins = last_ins;
1752 }
1753
1754 static const int 
1755 branch_cc_table [] = {
1756         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1757         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1758         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1759 };
1760
1761 /* Maps CMP_... constants to X86_CC_... constants */
1762 static const int
1763 cc_table [] = {
1764         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1765         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1766 };
1767
1768 static const int
1769 cc_signed_table [] = {
1770         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1771         FALSE, FALSE, FALSE, FALSE
1772 };
1773
1774 void
1775 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1776 {
1777         if (cfg->opt & MONO_OPT_PEEPHOLE)
1778                 peephole_pass_1 (cfg, bb);
1779
1780         mono_local_regalloc (cfg, bb);
1781 }
1782
1783 static unsigned char*
1784 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1785 {
1786 #define XMM_TEMP_REG 0
1787         if (cfg->opt & MONO_OPT_SSE2 && size < 8) {
1788                 /* optimize by assigning a local var for this use so we avoid
1789                  * the stack manipulations */
1790                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1791                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1792                 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1793                 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1794                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1795                 if (size == 1)
1796                         x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1797                 else if (size == 2)
1798                         x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1799                 return code;
1800         }
1801         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1802         x86_fnstcw_membase(code, X86_ESP, 0);
1803         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1804         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1805         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1806         x86_fldcw_membase (code, X86_ESP, 2);
1807         if (size == 8) {
1808                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1809                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1810                 x86_pop_reg (code, dreg);
1811                 /* FIXME: need the high register 
1812                  * x86_pop_reg (code, dreg_high);
1813                  */
1814         } else {
1815                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1816                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1817                 x86_pop_reg (code, dreg);
1818         }
1819         x86_fldcw_membase (code, X86_ESP, 0);
1820         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1821
1822         if (size == 1)
1823                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1824         else if (size == 2)
1825                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1826         return code;
1827 }
1828
1829 static unsigned char*
1830 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1831 {
1832         int sreg = tree->sreg1;
1833         int need_touch = FALSE;
1834
1835 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1836         need_touch = TRUE;
1837 #endif
1838
1839         if (need_touch) {
1840                 guint8* br[5];
1841
1842                 /*
1843                  * Under Windows:
1844                  * If requested stack size is larger than one page,
1845                  * perform stack-touch operation
1846                  */
1847                 /*
1848                  * Generate stack probe code.
1849                  * Under Windows, it is necessary to allocate one page at a time,
1850                  * "touching" stack after each successful sub-allocation. This is
1851                  * because of the way stack growth is implemented - there is a
1852                  * guard page before the lowest stack page that is currently commited.
1853                  * Stack normally grows sequentially so OS traps access to the
1854                  * guard page and commits more pages when needed.
1855                  */
1856                 x86_test_reg_imm (code, sreg, ~0xFFF);
1857                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1858
1859                 br[2] = code; /* loop */
1860                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1861                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1862
1863                 /* 
1864                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1865                  * that follows only initializes the last part of the area.
1866                  */
1867                 /* Same as the init code below with size==0x1000 */
1868                 if (tree->flags & MONO_INST_INIT) {
1869                         x86_push_reg (code, X86_EAX);
1870                         x86_push_reg (code, X86_ECX);
1871                         x86_push_reg (code, X86_EDI);
1872                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1873                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1874                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1875                         x86_cld (code);
1876                         x86_prefix (code, X86_REP_PREFIX);
1877                         x86_stosl (code);
1878                         x86_pop_reg (code, X86_EDI);
1879                         x86_pop_reg (code, X86_ECX);
1880                         x86_pop_reg (code, X86_EAX);
1881                 }
1882
1883                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1884                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1885                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1886                 x86_patch (br[3], br[2]);
1887                 x86_test_reg_reg (code, sreg, sreg);
1888                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1889                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1890
1891                 br[1] = code; x86_jump8 (code, 0);
1892
1893                 x86_patch (br[0], code);
1894                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1895                 x86_patch (br[1], code);
1896                 x86_patch (br[4], code);
1897         }
1898         else
1899                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1900
1901         if (tree->flags & MONO_INST_INIT) {
1902                 int offset = 0;
1903                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1904                         x86_push_reg (code, X86_EAX);
1905                         offset += 4;
1906                 }
1907                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1908                         x86_push_reg (code, X86_ECX);
1909                         offset += 4;
1910                 }
1911                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1912                         x86_push_reg (code, X86_EDI);
1913                         offset += 4;
1914                 }
1915                 
1916                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1917                 if (sreg != X86_ECX)
1918                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1919                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1920                                 
1921                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1922                 x86_cld (code);
1923                 x86_prefix (code, X86_REP_PREFIX);
1924                 x86_stosl (code);
1925                 
1926                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1927                         x86_pop_reg (code, X86_EDI);
1928                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1929                         x86_pop_reg (code, X86_ECX);
1930                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1931                         x86_pop_reg (code, X86_EAX);
1932         }
1933         return code;
1934 }
1935
1936
1937 static guint8*
1938 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1939 {
1940         CallInfo *cinfo;
1941         int quad;
1942
1943         /* Move return value to the target register */
1944         switch (ins->opcode) {
1945         case CEE_CALL:
1946         case OP_CALL_REG:
1947         case OP_CALL_MEMBASE:
1948                 if (ins->dreg != X86_EAX)
1949                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1950                 break;
1951         case OP_VCALL:
1952         case OP_VCALL_REG:
1953         case OP_VCALL_MEMBASE:
1954                 cinfo = get_call_info (cfg, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
1955                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1956                         /* Pop the destination address from the stack */
1957                         x86_pop_reg (code, X86_ECX);
1958                         
1959                         for (quad = 0; quad < 2; quad ++) {
1960                                 switch (cinfo->ret.pair_storage [quad]) {
1961                                 case ArgInIReg:
1962                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1963                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1964                                         break;
1965                                 case ArgNone:
1966                                         break;
1967                                 default:
1968                                         g_assert_not_reached ();
1969                                 }
1970                         }
1971                 }
1972         default:
1973                 break;
1974         }
1975
1976         return code;
1977 }
1978
1979 /*
1980  * emit_tls_get:
1981  * @code: buffer to store code to
1982  * @dreg: hard register where to place the result
1983  * @tls_offset: offset info
1984  *
1985  * emit_tls_get emits in @code the native code that puts in the dreg register
1986  * the item in the thread local storage identified by tls_offset.
1987  *
1988  * Returns: a pointer to the end of the stored code
1989  */
1990 static guint8*
1991 emit_tls_get (guint8* code, int dreg, int tls_offset)
1992 {
1993 #ifdef PLATFORM_WIN32
1994         /* 
1995          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1996          * Journal and/or a disassembly of the TlsGet () function.
1997          */
1998         g_assert (tls_offset < 64);
1999         x86_prefix (code, X86_FS_PREFIX);
2000         x86_mov_reg_mem (code, dreg, 0x18, 4);
2001         /* Dunno what this does but TlsGetValue () contains it */
2002         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2003         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2004 #else
2005         if (optimize_for_xen) {
2006                 x86_prefix (code, X86_GS_PREFIX);
2007                 x86_mov_reg_mem (code, dreg, 0, 4);
2008                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
2009         } else {
2010                 x86_prefix (code, X86_GS_PREFIX);
2011                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
2012         }
2013 #endif
2014         return code;
2015 }
2016
2017 /*
2018  * emit_load_volatile_arguments:
2019  *
2020  *  Load volatile arguments from the stack to the original input registers.
2021  * Required before a tail call.
2022  */
2023 static guint8*
2024 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2025 {
2026         MonoMethod *method = cfg->method;
2027         MonoMethodSignature *sig;
2028         MonoInst *inst;
2029         CallInfo *cinfo;
2030         guint32 i;
2031
2032         /* FIXME: Generate intermediate code instead */
2033
2034         sig = mono_method_signature (method);
2035
2036         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
2037         
2038         /* This is the opposite of the code in emit_prolog */
2039
2040         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2041                 ArgInfo *ainfo = cinfo->args + i;
2042                 MonoType *arg_type;
2043                 inst = cfg->args [i];
2044
2045                 if (sig->hasthis && (i == 0))
2046                         arg_type = &mono_defaults.object_class->byval_arg;
2047                 else
2048                         arg_type = sig->params [i - sig->hasthis];
2049
2050                 /*
2051                  * On x86, the arguments are either in their original stack locations, or in
2052                  * global regs.
2053                  */
2054                 if (inst->opcode == OP_REGVAR) {
2055                         g_assert (ainfo->storage == ArgOnStack);
2056                         
2057                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
2058                 }
2059         }
2060
2061         return code;
2062 }
2063
2064 #define REAL_PRINT_REG(text,reg) \
2065 mono_assert (reg >= 0); \
2066 x86_push_reg (code, X86_EAX); \
2067 x86_push_reg (code, X86_EDX); \
2068 x86_push_reg (code, X86_ECX); \
2069 x86_push_reg (code, reg); \
2070 x86_push_imm (code, reg); \
2071 x86_push_imm (code, text " %d %p\n"); \
2072 x86_mov_reg_imm (code, X86_EAX, printf); \
2073 x86_call_reg (code, X86_EAX); \
2074 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2075 x86_pop_reg (code, X86_ECX); \
2076 x86_pop_reg (code, X86_EDX); \
2077 x86_pop_reg (code, X86_EAX);
2078
2079 /* benchmark and set based on cpu */
2080 #define LOOP_ALIGNMENT 8
2081 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2082
2083 void
2084 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2085 {
2086         MonoInst *ins;
2087         MonoCallInst *call;
2088         guint offset;
2089         guint8 *code = cfg->native_code + cfg->code_len;
2090         MonoInst *last_ins = NULL;
2091         guint last_offset = 0;
2092         int max_len, cpos;
2093
2094         if (cfg->opt & MONO_OPT_PEEPHOLE)
2095                 peephole_pass (cfg, bb);
2096
2097         if (cfg->opt & MONO_OPT_LOOP) {
2098                 int pad, align = LOOP_ALIGNMENT;
2099                 /* set alignment depending on cpu */
2100                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2101                         pad = align - pad;
2102                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2103                         x86_padding (code, pad);
2104                         cfg->code_len += pad;
2105                         bb->native_offset = cfg->code_len;
2106                 }
2107         }
2108
2109         if (cfg->verbose_level > 2)
2110                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2111
2112         cpos = bb->max_offset;
2113
2114         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2115                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2116                 g_assert (!cfg->compile_aot);
2117                 cpos += 6;
2118
2119                 cov->data [bb->dfn].cil_code = bb->cil_code;
2120                 /* this is not thread save, but good enough */
2121                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2122         }
2123
2124         offset = code - cfg->native_code;
2125
2126         mono_debug_open_block (cfg, bb, offset);
2127
2128         ins = bb->code;
2129         while (ins) {
2130                 offset = code - cfg->native_code;
2131
2132                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2133
2134                 if (offset > (cfg->code_size - max_len - 16)) {
2135                         cfg->code_size *= 2;
2136                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2137                         code = cfg->native_code + offset;
2138                         mono_jit_stats.code_reallocs++;
2139                 }
2140
2141                 mono_debug_record_line_number (cfg, ins, offset);
2142
2143                 switch (ins->opcode) {
2144                 case OP_BIGMUL:
2145                         x86_mul_reg (code, ins->sreg2, TRUE);
2146                         break;
2147                 case OP_BIGMUL_UN:
2148                         x86_mul_reg (code, ins->sreg2, FALSE);
2149                         break;
2150                 case OP_X86_SETEQ_MEMBASE:
2151                 case OP_X86_SETNE_MEMBASE:
2152                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2153                                          ins->inst_basereg, ins->inst_offset, TRUE);
2154                         break;
2155                 case OP_STOREI1_MEMBASE_IMM:
2156                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2157                         break;
2158                 case OP_STOREI2_MEMBASE_IMM:
2159                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2160                         break;
2161                 case OP_STORE_MEMBASE_IMM:
2162                 case OP_STOREI4_MEMBASE_IMM:
2163                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2164                         break;
2165                 case OP_STOREI1_MEMBASE_REG:
2166                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2167                         break;
2168                 case OP_STOREI2_MEMBASE_REG:
2169                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2170                         break;
2171                 case OP_STORE_MEMBASE_REG:
2172                 case OP_STOREI4_MEMBASE_REG:
2173                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2174                         break;
2175                 case CEE_LDIND_I:
2176                 case CEE_LDIND_I4:
2177                 case CEE_LDIND_U4:
2178                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2179                         break;
2180                 case OP_LOADU4_MEM:
2181                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2182                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2183                         break;
2184                 case OP_LOAD_MEMBASE:
2185                 case OP_LOADI4_MEMBASE:
2186                 case OP_LOADU4_MEMBASE:
2187                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2188                         break;
2189                 case OP_LOADU1_MEMBASE:
2190                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2191                         break;
2192                 case OP_LOADI1_MEMBASE:
2193                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2194                         break;
2195                 case OP_LOADU2_MEMBASE:
2196                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2197                         break;
2198                 case OP_LOADI2_MEMBASE:
2199                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2200                         break;
2201                 case CEE_CONV_I1:
2202                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2203                         break;
2204                 case CEE_CONV_I2:
2205                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2206                         break;
2207                 case CEE_CONV_U1:
2208                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2209                         break;
2210                 case CEE_CONV_U2:
2211                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2212                         break;
2213                 case OP_COMPARE:
2214                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2215                         break;
2216                 case OP_COMPARE_IMM:
2217                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2218                         break;
2219                 case OP_X86_COMPARE_MEMBASE_REG:
2220                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2221                         break;
2222                 case OP_X86_COMPARE_MEMBASE_IMM:
2223                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2224                         break;
2225                 case OP_X86_COMPARE_MEMBASE8_IMM:
2226                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2227                         break;
2228                 case OP_X86_COMPARE_REG_MEMBASE:
2229                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2230                         break;
2231                 case OP_X86_COMPARE_MEM_IMM:
2232                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2233                         break;
2234                 case OP_X86_TEST_NULL:
2235                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2236                         break;
2237                 case OP_X86_ADD_MEMBASE_IMM:
2238                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2239                         break;
2240                 case OP_X86_ADD_MEMBASE:
2241                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2242                         break;
2243                 case OP_X86_SUB_MEMBASE_IMM:
2244                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2245                         break;
2246                 case OP_X86_SUB_MEMBASE:
2247                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2248                         break;
2249                 case OP_X86_AND_MEMBASE_IMM:
2250                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2251                         break;
2252                 case OP_X86_OR_MEMBASE_IMM:
2253                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2254                         break;
2255                 case OP_X86_XOR_MEMBASE_IMM:
2256                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2257                         break;
2258                 case OP_X86_INC_MEMBASE:
2259                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2260                         break;
2261                 case OP_X86_INC_REG:
2262                         x86_inc_reg (code, ins->dreg);
2263                         break;
2264                 case OP_X86_DEC_MEMBASE:
2265                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2266                         break;
2267                 case OP_X86_DEC_REG:
2268                         x86_dec_reg (code, ins->dreg);
2269                         break;
2270                 case OP_X86_MUL_MEMBASE:
2271                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2272                         break;
2273                 case OP_BREAK:
2274                         x86_breakpoint (code);
2275                         break;
2276                 case OP_ADDCC:
2277                 case CEE_ADD:
2278                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2279                         break;
2280                 case OP_ADC:
2281                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2282                         break;
2283                 case OP_ADDCC_IMM:
2284                 case OP_ADD_IMM:
2285                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2286                         break;
2287                 case OP_ADC_IMM:
2288                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2289                         break;
2290                 case OP_SUBCC:
2291                 case CEE_SUB:
2292                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2293                         break;
2294                 case OP_SBB:
2295                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2296                         break;
2297                 case OP_SUBCC_IMM:
2298                 case OP_SUB_IMM:
2299                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2300                         break;
2301                 case OP_SBB_IMM:
2302                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2303                         break;
2304                 case CEE_AND:
2305                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2306                         break;
2307                 case OP_AND_IMM:
2308                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2309                         break;
2310                 case CEE_DIV:
2311                         x86_cdq (code);
2312                         x86_div_reg (code, ins->sreg2, TRUE);
2313                         break;
2314                 case CEE_DIV_UN:
2315                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2316                         x86_div_reg (code, ins->sreg2, FALSE);
2317                         break;
2318                 case OP_DIV_IMM:
2319                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2320                         x86_cdq (code);
2321                         x86_div_reg (code, ins->sreg2, TRUE);
2322                         break;
2323                 case CEE_REM:
2324                         x86_cdq (code);
2325                         x86_div_reg (code, ins->sreg2, TRUE);
2326                         break;
2327                 case CEE_REM_UN:
2328                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2329                         x86_div_reg (code, ins->sreg2, FALSE);
2330                         break;
2331                 case OP_REM_IMM:
2332                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2333                         x86_cdq (code);
2334                         x86_div_reg (code, ins->sreg2, TRUE);
2335                         break;
2336                 case CEE_OR:
2337                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2338                         break;
2339                 case OP_OR_IMM:
2340                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2341                         break;
2342                 case CEE_XOR:
2343                 case OP_IXOR:
2344                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2345                         break;
2346                 case OP_XOR_IMM:
2347                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2348                         break;
2349                 case CEE_SHL:
2350                         g_assert (ins->sreg2 == X86_ECX);
2351                         x86_shift_reg (code, X86_SHL, ins->dreg);
2352                         break;
2353                 case CEE_SHR:
2354                         g_assert (ins->sreg2 == X86_ECX);
2355                         x86_shift_reg (code, X86_SAR, ins->dreg);
2356                         break;
2357                 case OP_SHR_IMM:
2358                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2359                         break;
2360                 case OP_SHR_UN_IMM:
2361                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2362                         break;
2363                 case CEE_SHR_UN:
2364                         g_assert (ins->sreg2 == X86_ECX);
2365                         x86_shift_reg (code, X86_SHR, ins->dreg);
2366                         break;
2367                 case OP_SHL_IMM:
2368                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2369                         break;
2370                 case OP_LSHL: {
2371                         guint8 *jump_to_end;
2372
2373                         /* handle shifts below 32 bits */
2374                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2375                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2376
2377                         x86_test_reg_imm (code, X86_ECX, 32);
2378                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2379
2380                         /* handle shift over 32 bit */
2381                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2382                         x86_clear_reg (code, ins->sreg1);
2383                         
2384                         x86_patch (jump_to_end, code);
2385                         }
2386                         break;
2387                 case OP_LSHR: {
2388                         guint8 *jump_to_end;
2389
2390                         /* handle shifts below 32 bits */
2391                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2392                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2393
2394                         x86_test_reg_imm (code, X86_ECX, 32);
2395                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2396
2397                         /* handle shifts over 31 bits */
2398                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2399                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2400                         
2401                         x86_patch (jump_to_end, code);
2402                         }
2403                         break;
2404                 case OP_LSHR_UN: {
2405                         guint8 *jump_to_end;
2406
2407                         /* handle shifts below 32 bits */
2408                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2409                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2410
2411                         x86_test_reg_imm (code, X86_ECX, 32);
2412                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2413
2414                         /* handle shifts over 31 bits */
2415                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2416                         x86_clear_reg (code, ins->backend.reg3);
2417                         
2418                         x86_patch (jump_to_end, code);
2419                         }
2420                         break;
2421                 case OP_LSHL_IMM:
2422                         if (ins->inst_imm >= 32) {
2423                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2424                                 x86_clear_reg (code, ins->sreg1);
2425                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2426                         } else {
2427                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2428                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2429                         }
2430                         break;
2431                 case OP_LSHR_IMM:
2432                         if (ins->inst_imm >= 32) {
2433                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2434                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2435                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2436                         } else {
2437                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2438                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2439                         }
2440                         break;
2441                 case OP_LSHR_UN_IMM:
2442                         if (ins->inst_imm >= 32) {
2443                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2444                                 x86_clear_reg (code, ins->backend.reg3);
2445                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2446                         } else {
2447                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2448                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2449                         }
2450                         break;
2451                 case CEE_NOT:
2452                         x86_not_reg (code, ins->sreg1);
2453                         break;
2454                 case CEE_NEG:
2455                         x86_neg_reg (code, ins->sreg1);
2456                         break;
2457                 case OP_SEXT_I1:
2458                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2459                         break;
2460                 case OP_SEXT_I2:
2461                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2462                         break;
2463                 case CEE_MUL:
2464                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2465                         break;
2466                 case OP_MUL_IMM:
2467                         switch (ins->inst_imm) {
2468                         case 2:
2469                                 /* MOV r1, r2 */
2470                                 /* ADD r1, r1 */
2471                                 if (ins->dreg != ins->sreg1)
2472                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2473                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2474                                 break;
2475                         case 3:
2476                                 /* LEA r1, [r2 + r2*2] */
2477                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2478                                 break;
2479                         case 5:
2480                                 /* LEA r1, [r2 + r2*4] */
2481                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2482                                 break;
2483                         case 6:
2484                                 /* LEA r1, [r2 + r2*2] */
2485                                 /* ADD r1, r1          */
2486                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2487                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2488                                 break;
2489                         case 9:
2490                                 /* LEA r1, [r2 + r2*8] */
2491                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2492                                 break;
2493                         case 10:
2494                                 /* LEA r1, [r2 + r2*4] */
2495                                 /* ADD r1, r1          */
2496                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2497                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2498                                 break;
2499                         case 12:
2500                                 /* LEA r1, [r2 + r2*2] */
2501                                 /* SHL r1, 2           */
2502                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2503                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2504                                 break;
2505                         case 25:
2506                                 /* LEA r1, [r2 + r2*4] */
2507                                 /* LEA r1, [r1 + r1*4] */
2508                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2509                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2510                                 break;
2511                         case 100:
2512                                 /* LEA r1, [r2 + r2*4] */
2513                                 /* SHL r1, 2           */
2514                                 /* LEA r1, [r1 + r1*4] */
2515                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2516                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2517                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2518                                 break;
2519                         default:
2520                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2521                                 break;
2522                         }
2523                         break;
2524                 case CEE_MUL_OVF:
2525                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2526                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2527                         break;
2528                 case CEE_MUL_OVF_UN: {
2529                         /* the mul operation and the exception check should most likely be split */
2530                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2531                         /*g_assert (ins->sreg2 == X86_EAX);
2532                         g_assert (ins->dreg == X86_EAX);*/
2533                         if (ins->sreg2 == X86_EAX) {
2534                                 non_eax_reg = ins->sreg1;
2535                         } else if (ins->sreg1 == X86_EAX) {
2536                                 non_eax_reg = ins->sreg2;
2537                         } else {
2538                                 /* no need to save since we're going to store to it anyway */
2539                                 if (ins->dreg != X86_EAX) {
2540                                         saved_eax = TRUE;
2541                                         x86_push_reg (code, X86_EAX);
2542                                 }
2543                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2544                                 non_eax_reg = ins->sreg2;
2545                         }
2546                         if (ins->dreg == X86_EDX) {
2547                                 if (!saved_eax) {
2548                                         saved_eax = TRUE;
2549                                         x86_push_reg (code, X86_EAX);
2550                                 }
2551                         } else if (ins->dreg != X86_EAX) {
2552                                 saved_edx = TRUE;
2553                                 x86_push_reg (code, X86_EDX);
2554                         }
2555                         x86_mul_reg (code, non_eax_reg, FALSE);
2556                         /* save before the check since pop and mov don't change the flags */
2557                         if (ins->dreg != X86_EAX)
2558                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2559                         if (saved_edx)
2560                                 x86_pop_reg (code, X86_EDX);
2561                         if (saved_eax)
2562                                 x86_pop_reg (code, X86_EAX);
2563                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2564                         break;
2565                 }
2566                 case OP_ICONST:
2567                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2568                         break;
2569                 case OP_AOTCONST:
2570                         g_assert_not_reached ();
2571                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2572                         x86_mov_reg_imm (code, ins->dreg, 0);
2573                         break;
2574                 case OP_LOAD_GOTADDR:
2575                         x86_call_imm (code, 0);
2576                         /* 
2577                          * The patch needs to point to the pop, since the GOT offset needs 
2578                          * to be added to that address.
2579                          */
2580                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2581                         x86_pop_reg (code, ins->dreg);
2582                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2583                         break;
2584                 case OP_GOT_ENTRY:
2585                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2586                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2587                         break;
2588                 case OP_X86_PUSH_GOT_ENTRY:
2589                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2590                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2591                         break;
2592                 case CEE_CONV_I4:
2593                 case OP_MOVE:
2594                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2595                         break;
2596                 case CEE_CONV_U4:
2597                         g_assert_not_reached ();
2598                 case OP_JMP: {
2599                         /*
2600                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2601                          * Keep in sync with the code in emit_epilog.
2602                          */
2603                         int pos = 0;
2604
2605                         /* FIXME: no tracing support... */
2606                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2607                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2608                         /* reset offset to make max_len work */
2609                         offset = code - cfg->native_code;
2610
2611                         g_assert (!cfg->method->save_lmf);
2612
2613                         code = emit_load_volatile_arguments (cfg, code);
2614
2615                         if (cfg->used_int_regs & (1 << X86_EBX))
2616                                 pos -= 4;
2617                         if (cfg->used_int_regs & (1 << X86_EDI))
2618                                 pos -= 4;
2619                         if (cfg->used_int_regs & (1 << X86_ESI))
2620                                 pos -= 4;
2621                         if (pos)
2622                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2623         
2624                         if (cfg->used_int_regs & (1 << X86_ESI))
2625                                 x86_pop_reg (code, X86_ESI);
2626                         if (cfg->used_int_regs & (1 << X86_EDI))
2627                                 x86_pop_reg (code, X86_EDI);
2628                         if (cfg->used_int_regs & (1 << X86_EBX))
2629                                 x86_pop_reg (code, X86_EBX);
2630         
2631                         /* restore ESP/EBP */
2632                         x86_leave (code);
2633                         offset = code - cfg->native_code;
2634                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2635                         x86_jump32 (code, 0);
2636                         break;
2637                 }
2638                 case OP_CHECK_THIS:
2639                         /* ensure ins->sreg1 is not NULL
2640                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2641                          * cmp DWORD PTR [eax], 0
2642                          */
2643                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2644                         break;
2645                 case OP_ARGLIST: {
2646                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2647                         x86_push_reg (code, hreg);
2648                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2649                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2650                         x86_pop_reg (code, hreg);
2651                         break;
2652                 }
2653                 case OP_FCALL:
2654                 case OP_LCALL:
2655                 case OP_VCALL:
2656                 case OP_VOIDCALL:
2657                 case CEE_CALL:
2658                         call = (MonoCallInst*)ins;
2659                         if (ins->flags & MONO_INST_HAS_METHOD)
2660                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2661                         else
2662                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2663                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2664                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2665                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2666                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2667                                  * smart enough to do that optimization yet
2668                                  *
2669                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2670                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2671                                  * (most likely from locality benefits). People with other processors should
2672                                  * check on theirs to see what happens.
2673                                  */
2674                                 if (call->stack_usage == 4) {
2675                                         /* we want to use registers that won't get used soon, so use
2676                                          * ecx, as eax will get allocated first. edx is used by long calls,
2677                                          * so we can't use that.
2678                                          */
2679                                         
2680                                         x86_pop_reg (code, X86_ECX);
2681                                 } else {
2682                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2683                                 }
2684                         }
2685                         code = emit_move_return_value (cfg, ins, code);
2686                         break;
2687                 case OP_FCALL_REG:
2688                 case OP_LCALL_REG:
2689                 case OP_VCALL_REG:
2690                 case OP_VOIDCALL_REG:
2691                 case OP_CALL_REG:
2692                         call = (MonoCallInst*)ins;
2693                         x86_call_reg (code, ins->sreg1);
2694                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2695                                 if (call->stack_usage == 4)
2696                                         x86_pop_reg (code, X86_ECX);
2697                                 else
2698                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2699                         }
2700                         code = emit_move_return_value (cfg, ins, code);
2701                         break;
2702                 case OP_FCALL_MEMBASE:
2703                 case OP_LCALL_MEMBASE:
2704                 case OP_VCALL_MEMBASE:
2705                 case OP_VOIDCALL_MEMBASE:
2706                 case OP_CALL_MEMBASE:
2707                         call = (MonoCallInst*)ins;
2708                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2709                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2710                                 if (call->stack_usage == 4)
2711                                         x86_pop_reg (code, X86_ECX);
2712                                 else
2713                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2714                         }
2715                         code = emit_move_return_value (cfg, ins, code);
2716                         break;
2717                 case OP_OUTARG:
2718                 case OP_X86_PUSH:
2719                         x86_push_reg (code, ins->sreg1);
2720                         break;
2721                 case OP_X86_PUSH_IMM:
2722                         x86_push_imm (code, ins->inst_imm);
2723                         break;
2724                 case OP_X86_PUSH_MEMBASE:
2725                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2726                         break;
2727                 case OP_X86_PUSH_OBJ: 
2728                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2729                         x86_push_reg (code, X86_EDI);
2730                         x86_push_reg (code, X86_ESI);
2731                         x86_push_reg (code, X86_ECX);
2732                         if (ins->inst_offset)
2733                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2734                         else
2735                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2736                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2737                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2738                         x86_cld (code);
2739                         x86_prefix (code, X86_REP_PREFIX);
2740                         x86_movsd (code);
2741                         x86_pop_reg (code, X86_ECX);
2742                         x86_pop_reg (code, X86_ESI);
2743                         x86_pop_reg (code, X86_EDI);
2744                         break;
2745                 case OP_X86_LEA:
2746                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2747                         break;
2748                 case OP_X86_LEA_MEMBASE:
2749                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2750                         break;
2751                 case OP_X86_XCHG:
2752                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2753                         break;
2754                 case OP_LOCALLOC:
2755                         /* keep alignment */
2756                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2757                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2758                         code = mono_emit_stack_alloc (code, ins);
2759                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2760                         break;
2761                 case CEE_RET:
2762                         x86_ret (code);
2763                         break;
2764                 case OP_THROW: {
2765                         x86_push_reg (code, ins->sreg1);
2766                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2767                                                           (gpointer)"mono_arch_throw_exception");
2768                         break;
2769                 }
2770                 case OP_RETHROW: {
2771                         x86_push_reg (code, ins->sreg1);
2772                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2773                                                           (gpointer)"mono_arch_rethrow_exception");
2774                         break;
2775                 }
2776                 case OP_CALL_HANDLER: 
2777                         /* Align stack */
2778 #ifdef __APPLE__
2779                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2780 #endif
2781                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2782                         x86_call_imm (code, 0);
2783 #ifdef __APPLE__
2784                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2785 #endif
2786                         break;
2787                 case OP_LABEL:
2788                         ins->inst_c0 = code - cfg->native_code;
2789                         break;
2790                 case OP_BR:
2791                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2792                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2793                         //break;
2794                         if (ins->flags & MONO_INST_BRLABEL) {
2795                                 if (ins->inst_i0->inst_c0) {
2796                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2797                                 } else {
2798                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2799                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2800                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2801                                                 x86_jump8 (code, 0);
2802                                         else 
2803                                                 x86_jump32 (code, 0);
2804                                 }
2805                         } else {
2806                                 if (ins->inst_target_bb->native_offset) {
2807                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2808                                 } else {
2809                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2810                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2811                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2812                                                 x86_jump8 (code, 0);
2813                                         else 
2814                                                 x86_jump32 (code, 0);
2815                                 } 
2816                         }
2817                         break;
2818                 case OP_BR_REG:
2819                         x86_jump_reg (code, ins->sreg1);
2820                         break;
2821                 case OP_CEQ:
2822                 case OP_CLT:
2823                 case OP_CLT_UN:
2824                 case OP_CGT:
2825                 case OP_CGT_UN:
2826                 case OP_CNE:
2827                         x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2828                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2829                         break;
2830                 case OP_COND_EXC_EQ:
2831                 case OP_COND_EXC_NE_UN:
2832                 case OP_COND_EXC_LT:
2833                 case OP_COND_EXC_LT_UN:
2834                 case OP_COND_EXC_GT:
2835                 case OP_COND_EXC_GT_UN:
2836                 case OP_COND_EXC_GE:
2837                 case OP_COND_EXC_GE_UN:
2838                 case OP_COND_EXC_LE:
2839                 case OP_COND_EXC_LE_UN:
2840                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
2841                         break;
2842                 case OP_COND_EXC_OV:
2843                 case OP_COND_EXC_NO:
2844                 case OP_COND_EXC_C:
2845                 case OP_COND_EXC_NC:
2846                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2847                         break;
2848                 case CEE_BEQ:
2849                 case CEE_BNE_UN:
2850                 case CEE_BLT:
2851                 case CEE_BLT_UN:
2852                 case CEE_BGT:
2853                 case CEE_BGT_UN:
2854                 case CEE_BGE:
2855                 case CEE_BGE_UN:
2856                 case CEE_BLE:
2857                 case CEE_BLE_UN:
2858                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2859                         break;
2860
2861                 /* floating point opcodes */
2862                 case OP_R8CONST: {
2863                         double d = *(double *)ins->inst_p0;
2864
2865                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2866                                 x86_fldz (code);
2867                         } else if (d == 1.0) {
2868                                 x86_fld1 (code);
2869                         } else {
2870                                 if (cfg->compile_aot) {
2871                                         guint32 *val = (guint32*)&d;
2872                                         x86_push_imm (code, val [1]);
2873                                         x86_push_imm (code, val [0]);
2874                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2875                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2876                                 }
2877                                 else {
2878                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2879                                         x86_fld (code, NULL, TRUE);
2880                                 }
2881                         }
2882                         break;
2883                 }
2884                 case OP_R4CONST: {
2885                         float f = *(float *)ins->inst_p0;
2886
2887                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2888                                 x86_fldz (code);
2889                         } else if (f == 1.0) {
2890                                 x86_fld1 (code);
2891                         } else {
2892                                 if (cfg->compile_aot) {
2893                                         guint32 val = *(guint32*)&f;
2894                                         x86_push_imm (code, val);
2895                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2896                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2897                                 }
2898                                 else {
2899                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2900                                         x86_fld (code, NULL, FALSE);
2901                                 }
2902                         }
2903                         break;
2904                 }
2905                 case OP_STORER8_MEMBASE_REG:
2906                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2907                         break;
2908                 case OP_LOADR8_SPILL_MEMBASE:
2909                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2910                         x86_fxch (code, 1);
2911                         break;
2912                 case OP_LOADR8_MEMBASE:
2913                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2914                         break;
2915                 case OP_STORER4_MEMBASE_REG:
2916                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2917                         break;
2918                 case OP_LOADR4_MEMBASE:
2919                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2920                         break;
2921                 case CEE_CONV_R4: /* FIXME: change precision */
2922                 case CEE_CONV_R8:
2923                         x86_push_reg (code, ins->sreg1);
2924                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2925                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2926                         break;
2927                 case OP_X86_FP_LOAD_I8:
2928                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2929                         break;
2930                 case OP_X86_FP_LOAD_I4:
2931                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2932                         break;
2933                 case OP_FCONV_TO_I1:
2934                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2935                         break;
2936                 case OP_FCONV_TO_U1:
2937                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2938                         break;
2939                 case OP_FCONV_TO_I2:
2940                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2941                         break;
2942                 case OP_FCONV_TO_U2:
2943                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2944                         break;
2945                 case OP_FCONV_TO_I4:
2946                 case OP_FCONV_TO_I:
2947                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2948                         break;
2949                 case OP_FCONV_TO_I8:
2950                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2951                         x86_fnstcw_membase(code, X86_ESP, 0);
2952                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2953                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2954                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2955                         x86_fldcw_membase (code, X86_ESP, 2);
2956                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2957                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2958                         x86_pop_reg (code, ins->dreg);
2959                         x86_pop_reg (code, ins->backend.reg3);
2960                         x86_fldcw_membase (code, X86_ESP, 0);
2961                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2962                         break;
2963                 case OP_LCONV_TO_R_UN: { 
2964                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2965                         guint8 *br;
2966
2967                         /* load 64bit integer to FP stack */
2968                         x86_push_imm (code, 0);
2969                         x86_push_reg (code, ins->sreg2);
2970                         x86_push_reg (code, ins->sreg1);
2971                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2972                         /* store as 80bit FP value */
2973                         x86_fst80_membase (code, X86_ESP, 0);
2974                         
2975                         /* test if lreg is negative */
2976                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2977                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2978         
2979                         /* add correction constant mn */
2980                         x86_fld80_mem (code, mn);
2981                         x86_fld80_membase (code, X86_ESP, 0);
2982                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2983                         x86_fst80_membase (code, X86_ESP, 0);
2984
2985                         x86_patch (br, code);
2986
2987                         x86_fld80_membase (code, X86_ESP, 0);
2988                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2989
2990                         break;
2991                 }
2992                 case OP_LCONV_TO_OVF_I: {
2993                         guint8 *br [3], *label [1];
2994                         MonoInst *tins;
2995
2996                         /* 
2997                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2998                          */
2999                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3000
3001                         /* If the low word top bit is set, see if we are negative */
3002                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3003                         /* We are not negative (no top bit set, check for our top word to be zero */
3004                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3005                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3006                         label [0] = code;
3007
3008                         /* throw exception */
3009                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
3010                         if (tins) {
3011                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
3012                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
3013                                         x86_jump8 (code, 0);
3014                                 else
3015                                         x86_jump32 (code, 0);
3016                         } else {
3017                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3018                                 x86_jump32 (code, 0);
3019                         }
3020         
3021         
3022                         x86_patch (br [0], code);
3023                         /* our top bit is set, check that top word is 0xfffffff */
3024                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3025                 
3026                         x86_patch (br [1], code);
3027                         /* nope, emit exception */
3028                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3029                         x86_patch (br [2], label [0]);
3030
3031                         if (ins->dreg != ins->sreg1)
3032                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3033                         break;
3034                 }
3035                 case OP_FADD:
3036                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3037                         break;
3038                 case OP_FSUB:
3039                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3040                         break;          
3041                 case OP_FMUL:
3042                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3043                         break;          
3044                 case OP_FDIV:
3045                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3046                         break;          
3047                 case OP_FNEG:
3048                         x86_fchs (code);
3049                         break;          
3050                 case OP_SIN:
3051                         x86_fsin (code);
3052                         x86_fldz (code);
3053                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3054                         break;          
3055                 case OP_COS:
3056                         x86_fcos (code);
3057                         x86_fldz (code);
3058                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3059                         break;          
3060                 case OP_ABS:
3061                         x86_fabs (code);
3062                         break;          
3063                 case OP_TAN: {
3064                         /* 
3065                          * it really doesn't make sense to inline all this code,
3066                          * it's here just to show that things may not be as simple 
3067                          * as they appear.
3068                          */
3069                         guchar *check_pos, *end_tan, *pop_jump;
3070                         x86_push_reg (code, X86_EAX);
3071                         x86_fptan (code);
3072                         x86_fnstsw (code);
3073                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3074                         check_pos = code;
3075                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3076                         x86_fstp (code, 0); /* pop the 1.0 */
3077                         end_tan = code;
3078                         x86_jump8 (code, 0);
3079                         x86_fldpi (code);
3080                         x86_fp_op (code, X86_FADD, 0);
3081                         x86_fxch (code, 1);
3082                         x86_fprem1 (code);
3083                         x86_fstsw (code);
3084                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3085                         pop_jump = code;
3086                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3087                         x86_fstp (code, 1);
3088                         x86_fptan (code);
3089                         x86_patch (pop_jump, code);
3090                         x86_fstp (code, 0); /* pop the 1.0 */
3091                         x86_patch (check_pos, code);
3092                         x86_patch (end_tan, code);
3093                         x86_fldz (code);
3094                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3095                         x86_pop_reg (code, X86_EAX);
3096                         break;
3097                 }
3098                 case OP_ATAN:
3099                         x86_fld1 (code);
3100                         x86_fpatan (code);
3101                         x86_fldz (code);
3102                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3103                         break;          
3104                 case OP_SQRT:
3105                         x86_fsqrt (code);
3106                         break;          
3107                 case OP_X86_FPOP:
3108                         x86_fstp (code, 0);
3109                         break;          
3110                 case OP_FREM: {
3111                         guint8 *l1, *l2;
3112
3113                         x86_push_reg (code, X86_EAX);
3114                         /* we need to exchange ST(0) with ST(1) */
3115                         x86_fxch (code, 1);
3116
3117                         /* this requires a loop, because fprem somtimes 
3118                          * returns a partial remainder */
3119                         l1 = code;
3120                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3121                         /* x86_fprem1 (code); */
3122                         x86_fprem (code);
3123                         x86_fnstsw (code);
3124                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3125                         l2 = code + 2;
3126                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3127
3128                         /* pop result */
3129                         x86_fstp (code, 1);
3130
3131                         x86_pop_reg (code, X86_EAX);
3132                         break;
3133                 }
3134                 case OP_FCOMPARE:
3135                         if (cfg->opt & MONO_OPT_FCMOV) {
3136                                 x86_fcomip (code, 1);
3137                                 x86_fstp (code, 0);
3138                                 break;
3139                         }
3140                         /* this overwrites EAX */
3141                         EMIT_FPCOMPARE(code);
3142                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3143                         break;
3144                 case OP_FCEQ:
3145                         if (cfg->opt & MONO_OPT_FCMOV) {
3146                                 /* zeroing the register at the start results in 
3147                                  * shorter and faster code (we can also remove the widening op)
3148                                  */
3149                                 guchar *unordered_check;
3150                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3151                                 x86_fcomip (code, 1);
3152                                 x86_fstp (code, 0);
3153                                 unordered_check = code;
3154                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3155                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3156                                 x86_patch (unordered_check, code);
3157                                 break;
3158                         }
3159                         if (ins->dreg != X86_EAX) 
3160                                 x86_push_reg (code, X86_EAX);
3161
3162                         EMIT_FPCOMPARE(code);
3163                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3164                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3165                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3166                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3167
3168                         if (ins->dreg != X86_EAX) 
3169                                 x86_pop_reg (code, X86_EAX);
3170                         break;
3171                 case OP_FCLT:
3172                 case OP_FCLT_UN:
3173                         if (cfg->opt & MONO_OPT_FCMOV) {
3174                                 /* zeroing the register at the start results in 
3175                                  * shorter and faster code (we can also remove the widening op)
3176                                  */
3177                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3178                                 x86_fcomip (code, 1);
3179                                 x86_fstp (code, 0);
3180                                 if (ins->opcode == OP_FCLT_UN) {
3181                                         guchar *unordered_check = code;
3182                                         guchar *jump_to_end;
3183                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3184                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3185                                         jump_to_end = code;
3186                                         x86_jump8 (code, 0);
3187                                         x86_patch (unordered_check, code);
3188                                         x86_inc_reg (code, ins->dreg);
3189                                         x86_patch (jump_to_end, code);
3190                                 } else {
3191                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3192                                 }
3193                                 break;
3194                         }
3195                         if (ins->dreg != X86_EAX) 
3196                                 x86_push_reg (code, X86_EAX);
3197
3198                         EMIT_FPCOMPARE(code);
3199                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3200                         if (ins->opcode == OP_FCLT_UN) {
3201                                 guchar *is_not_zero_check, *end_jump;
3202                                 is_not_zero_check = code;
3203                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3204                                 end_jump = code;
3205                                 x86_jump8 (code, 0);
3206                                 x86_patch (is_not_zero_check, code);
3207                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3208
3209                                 x86_patch (end_jump, code);
3210                         }
3211                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3212                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3213
3214                         if (ins->dreg != X86_EAX) 
3215                                 x86_pop_reg (code, X86_EAX);
3216                         break;
3217                 case OP_FCGT:
3218                 case OP_FCGT_UN:
3219                         if (cfg->opt & MONO_OPT_FCMOV) {
3220                                 /* zeroing the register at the start results in 
3221                                  * shorter and faster code (we can also remove the widening op)
3222                                  */
3223                                 guchar *unordered_check;
3224                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3225                                 x86_fcomip (code, 1);
3226                                 x86_fstp (code, 0);
3227                                 if (ins->opcode == OP_FCGT) {
3228                                         unordered_check = code;
3229                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3230                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3231                                         x86_patch (unordered_check, code);
3232                                 } else {
3233                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3234                                 }
3235                                 break;
3236                         }
3237                         if (ins->dreg != X86_EAX) 
3238                                 x86_push_reg (code, X86_EAX);
3239
3240                         EMIT_FPCOMPARE(code);
3241                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3242                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3243                         if (ins->opcode == OP_FCGT_UN) {
3244                                 guchar *is_not_zero_check, *end_jump;
3245                                 is_not_zero_check = code;
3246                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3247                                 end_jump = code;
3248                                 x86_jump8 (code, 0);
3249                                 x86_patch (is_not_zero_check, code);
3250                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3251         
3252                                 x86_patch (end_jump, code);
3253                         }
3254                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3255                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3256
3257                         if (ins->dreg != X86_EAX) 
3258                                 x86_pop_reg (code, X86_EAX);
3259                         break;
3260                 case OP_FBEQ:
3261                         if (cfg->opt & MONO_OPT_FCMOV) {
3262                                 guchar *jump = code;
3263                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3264                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3265                                 x86_patch (jump, code);
3266                                 break;
3267                         }
3268                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3269                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3270                         break;
3271                 case OP_FBNE_UN:
3272                         /* Branch if C013 != 100 */
3273                         if (cfg->opt & MONO_OPT_FCMOV) {
3274                                 /* branch if !ZF or (PF|CF) */
3275                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3276                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3277                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3278                                 break;
3279                         }
3280                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3281                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3282                         break;
3283                 case OP_FBLT:
3284                         if (cfg->opt & MONO_OPT_FCMOV) {
3285                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3286                                 break;
3287                         }
3288                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3289                         break;
3290                 case OP_FBLT_UN:
3291                         if (cfg->opt & MONO_OPT_FCMOV) {
3292                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3293                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3294                                 break;
3295                         }
3296                         if (ins->opcode == OP_FBLT_UN) {
3297                                 guchar *is_not_zero_check, *end_jump;
3298                                 is_not_zero_check = code;
3299                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3300                                 end_jump = code;
3301                                 x86_jump8 (code, 0);
3302                                 x86_patch (is_not_zero_check, code);
3303                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3304
3305                                 x86_patch (end_jump, code);
3306                         }
3307                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3308                         break;
3309                 case OP_FBGT:
3310                 case OP_FBGT_UN:
3311                         if (cfg->opt & MONO_OPT_FCMOV) {
3312                                 if (ins->opcode == OP_FBGT) {
3313                                         guchar *br1;
3314
3315                                         /* skip branch if C1=1 */
3316                                         br1 = code;
3317                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3318                                         /* branch if (C0 | C3) = 1 */
3319                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3320                                         x86_patch (br1, code);
3321                                 } else {
3322                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3323                                 }
3324                                 break;
3325                         }
3326                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3327                         if (ins->opcode == OP_FBGT_UN) {
3328                                 guchar *is_not_zero_check, *end_jump;
3329                                 is_not_zero_check = code;
3330                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3331                                 end_jump = code;
3332                                 x86_jump8 (code, 0);
3333                                 x86_patch (is_not_zero_check, code);
3334                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3335
3336                                 x86_patch (end_jump, code);
3337                         }
3338                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3339                         break;
3340                 case OP_FBGE:
3341                         /* Branch if C013 == 100 or 001 */
3342                         if (cfg->opt & MONO_OPT_FCMOV) {
3343                                 guchar *br1;
3344
3345                                 /* skip branch if C1=1 */
3346                                 br1 = code;
3347                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3348                                 /* branch if (C0 | C3) = 1 */
3349                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3350                                 x86_patch (br1, code);
3351                                 break;
3352                         }
3353                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3354                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3355                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3356                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3357                         break;
3358                 case OP_FBGE_UN:
3359                         /* Branch if C013 == 000 */
3360                         if (cfg->opt & MONO_OPT_FCMOV) {
3361                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3362                                 break;
3363                         }
3364                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3365                         break;
3366                 case OP_FBLE:
3367                         /* Branch if C013=000 or 100 */
3368                         if (cfg->opt & MONO_OPT_FCMOV) {
3369                                 guchar *br1;
3370
3371                                 /* skip branch if C1=1 */
3372                                 br1 = code;
3373                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3374                                 /* branch if C0=0 */
3375                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3376                                 x86_patch (br1, code);
3377                                 break;
3378                         }
3379                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3380                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3381                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3382                         break;
3383                 case OP_FBLE_UN:
3384                         /* Branch if C013 != 001 */
3385                         if (cfg->opt & MONO_OPT_FCMOV) {
3386                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3387                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3388                                 break;
3389                         }
3390                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3391                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3392                         break;
3393                 case OP_CKFINITE: {
3394                         x86_push_reg (code, X86_EAX);
3395                         x86_fxam (code);
3396                         x86_fnstsw (code);
3397                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3398                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3399                         x86_pop_reg (code, X86_EAX);
3400                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3401                         break;
3402                 }
3403                 case OP_TLS_GET: {
3404                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3405                         break;
3406                 }
3407                 case OP_MEMORY_BARRIER: {
3408                         /* Not needed on x86 */
3409                         break;
3410                 }
3411                 case OP_ATOMIC_ADD_I4: {
3412                         int dreg = ins->dreg;
3413
3414                         if (dreg == ins->inst_basereg) {
3415                                 x86_push_reg (code, ins->sreg2);
3416                                 dreg = ins->sreg2;
3417                         } 
3418                         
3419                         if (dreg != ins->sreg2)
3420                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3421
3422                         x86_prefix (code, X86_LOCK_PREFIX);
3423                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3424
3425                         if (dreg != ins->dreg) {
3426                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3427                                 x86_pop_reg (code, dreg);
3428                         }
3429
3430                         break;
3431                 }
3432                 case OP_ATOMIC_ADD_NEW_I4: {
3433                         int dreg = ins->dreg;
3434
3435                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3436                         if (ins->sreg2 == dreg) {
3437                                 if (dreg == X86_EBX) {
3438                                         dreg = X86_EDI;
3439                                         if (ins->inst_basereg == X86_EDI)
3440                                                 dreg = X86_ESI;
3441                                 } else {
3442                                         dreg = X86_EBX;
3443                                         if (ins->inst_basereg == X86_EBX)
3444                                                 dreg = X86_EDI;
3445                                 }
3446                         } else if (ins->inst_basereg == dreg) {
3447                                 if (dreg == X86_EBX) {
3448                                         dreg = X86_EDI;
3449                                         if (ins->sreg2 == X86_EDI)
3450                                                 dreg = X86_ESI;
3451                                 } else {
3452                                         dreg = X86_EBX;
3453                                         if (ins->sreg2 == X86_EBX)
3454                                                 dreg = X86_EDI;
3455                                 }
3456                         }
3457
3458                         if (dreg != ins->dreg) {
3459                                 x86_push_reg (code, dreg);
3460                         }
3461
3462                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3463                         x86_prefix (code, X86_LOCK_PREFIX);
3464                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3465                         /* dreg contains the old value, add with sreg2 value */
3466                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3467                         
3468                         if (ins->dreg != dreg) {
3469                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3470                                 x86_pop_reg (code, dreg);
3471                         }
3472
3473                         break;
3474                 }
3475                 case OP_ATOMIC_EXCHANGE_I4: {
3476                         guchar *br[2];
3477                         int sreg2 = ins->sreg2;
3478                         int breg = ins->inst_basereg;
3479
3480                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3481                          * hack to overcome limits in x86 reg allocator 
3482                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3483                          */
3484                         if (ins->dreg != X86_EAX)
3485                                 x86_push_reg (code, X86_EAX);
3486                         
3487                         /* We need the EAX reg for the cmpxchg */
3488                         if (ins->sreg2 == X86_EAX) {
3489                                 x86_push_reg (code, X86_EDX);
3490                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3491                                 sreg2 = X86_EDX;
3492                         }
3493
3494                         if (breg == X86_EAX) {
3495                                 x86_push_reg (code, X86_ESI);
3496                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3497                                 breg = X86_ESI;
3498                         }
3499
3500                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3501
3502                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3503                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3504                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3505                         x86_patch (br [1], br [0]);
3506
3507                         if (breg != ins->inst_basereg)
3508                                 x86_pop_reg (code, X86_ESI);
3509
3510                         if (ins->dreg != X86_EAX) {
3511                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3512                                 x86_pop_reg (code, X86_EAX);
3513                         }
3514
3515                         if (ins->sreg2 != sreg2)
3516                                 x86_pop_reg (code, X86_EDX);
3517
3518                         break;
3519                 }
3520                 default:
3521                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3522                         g_assert_not_reached ();
3523                 }
3524
3525                 if ((code - cfg->native_code - offset) > max_len) {
3526                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3527                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3528                         g_assert_not_reached ();
3529                 }
3530                
3531                 cpos += max_len;
3532
3533                 last_ins = ins;
3534                 last_offset = offset;
3535                 
3536                 ins = ins->next;
3537         }
3538
3539         cfg->code_len = code - cfg->native_code;
3540 }
3541
3542 void
3543 mono_arch_register_lowlevel_calls (void)
3544 {
3545 }
3546
3547 void
3548 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3549 {
3550         MonoJumpInfo *patch_info;
3551         gboolean compile_aot = !run_cctors;
3552
3553         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3554                 unsigned char *ip = patch_info->ip.i + code;
3555                 const unsigned char *target;
3556
3557                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3558
3559                 if (compile_aot) {
3560                         switch (patch_info->type) {
3561                         case MONO_PATCH_INFO_BB:
3562                         case MONO_PATCH_INFO_LABEL:
3563                                 break;
3564                         default:
3565                                 /* No need to patch these */
3566                                 continue;
3567                         }
3568                 }
3569
3570                 switch (patch_info->type) {
3571                 case MONO_PATCH_INFO_IP:
3572                         *((gconstpointer *)(ip)) = target;
3573                         break;
3574                 case MONO_PATCH_INFO_CLASS_INIT: {
3575                         guint8 *code = ip;
3576                         /* Might already been changed to a nop */
3577                         x86_call_code (code, 0);
3578                         x86_patch (ip, target);
3579                         break;
3580                 }
3581                 case MONO_PATCH_INFO_ABS:
3582                 case MONO_PATCH_INFO_METHOD:
3583                 case MONO_PATCH_INFO_METHOD_JUMP:
3584                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3585                 case MONO_PATCH_INFO_BB:
3586                 case MONO_PATCH_INFO_LABEL:
3587                         x86_patch (ip, target);
3588                         break;
3589                 case MONO_PATCH_INFO_NONE:
3590                         break;
3591                 default: {
3592                         guint32 offset = mono_arch_get_patch_offset (ip);
3593                         *((gconstpointer *)(ip + offset)) = target;
3594                         break;
3595                 }
3596                 }
3597         }
3598 }
3599
3600 guint8 *
3601 mono_arch_emit_prolog (MonoCompile *cfg)
3602 {
3603         MonoMethod *method = cfg->method;
3604         MonoBasicBlock *bb;
3605         MonoMethodSignature *sig;
3606         MonoInst *inst;
3607         int alloc_size, pos, max_offset, i;
3608         guint8 *code;
3609
3610         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3611
3612         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3613                 cfg->code_size += 512;
3614
3615         code = cfg->native_code = g_malloc (cfg->code_size);
3616
3617         x86_push_reg (code, X86_EBP);
3618         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3619
3620         alloc_size = cfg->stack_offset;
3621         pos = 0;
3622
3623         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3624                 /* Might need to attach the thread to the JIT */
3625                 if (lmf_tls_offset != -1) {
3626                         guint8 *buf;
3627
3628                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3629                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3630                         buf = code;
3631                         x86_branch8 (code, X86_CC_NE, 0, 0);
3632                         x86_push_imm (code, cfg->domain);
3633                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3634                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3635                         x86_patch (buf, code);
3636 #ifdef PLATFORM_WIN32
3637                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3638                         /* FIXME: Add a separate key for LMF to avoid this */
3639                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3640 #endif
3641                 } else {
3642                         g_assert (!cfg->compile_aot);
3643                         x86_push_imm (code, cfg->domain);
3644                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3645                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3646                 }
3647         }
3648
3649         if (method->save_lmf) {
3650                 pos += sizeof (MonoLMF);
3651
3652                 /* save the current IP */
3653                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3654                 x86_push_imm_template (code);
3655
3656                 /* save all caller saved regs */
3657                 x86_push_reg (code, X86_EBP);
3658                 x86_push_reg (code, X86_ESI);
3659                 x86_push_reg (code, X86_EDI);
3660                 x86_push_reg (code, X86_EBX);
3661
3662                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3663                         /*
3664                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3665                          * through the mono_lmf_addr TLS variable.
3666                          */
3667                         /* %eax = previous_lmf */
3668                         x86_prefix (code, X86_GS_PREFIX);
3669                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3670                         /* skip esp + method_info + lmf */
3671                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
3672                         /* push previous_lmf */
3673                         x86_push_reg (code, X86_EAX);
3674                         /* new lmf = ESP */
3675                         x86_prefix (code, X86_GS_PREFIX);
3676                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
3677                 } else {
3678                         /* get the address of lmf for the current thread */
3679                         /* 
3680                          * This is performance critical so we try to use some tricks to make
3681                          * it fast.
3682                          */                                                                        
3683
3684                         if (lmf_addr_tls_offset != -1) {
3685                                 /* Load lmf quicky using the GS register */
3686                                 code = emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
3687 #ifdef PLATFORM_WIN32
3688                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3689                                 /* FIXME: Add a separate key for LMF to avoid this */
3690                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3691 #endif
3692                         } else {
3693                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3694                         }
3695
3696                         /* Skip esp + method info */
3697                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3698
3699                         /* push lmf */
3700                         x86_push_reg (code, X86_EAX); 
3701                         /* push *lfm (previous_lmf) */
3702                         x86_push_membase (code, X86_EAX, 0);
3703                         /* *(lmf) = ESP */
3704                         x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3705                 }
3706         } else {
3707
3708                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3709                         x86_push_reg (code, X86_EBX);
3710                         pos += 4;
3711                 }
3712
3713                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3714                         x86_push_reg (code, X86_EDI);
3715                         pos += 4;
3716                 }
3717
3718                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3719                         x86_push_reg (code, X86_ESI);
3720                         pos += 4;
3721                 }
3722         }
3723
3724         alloc_size -= pos;
3725
3726 #if __APPLE__
3727         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3728         {
3729                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3730                 if (tot & 4) {
3731                         tot += 4;
3732                         alloc_size += 4;
3733                 }
3734                 if (tot & 8) {
3735                         alloc_size += 8;
3736                 }
3737         }
3738 #endif
3739
3740         if (alloc_size) {
3741                 /* See mono_emit_stack_alloc */
3742 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3743                 guint32 remaining_size = alloc_size;
3744                 while (remaining_size >= 0x1000) {
3745                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3746                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3747                         remaining_size -= 0x1000;
3748                 }
3749                 if (remaining_size)
3750                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3751 #else
3752                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3753 #endif
3754         }
3755
3756 #if __APPLE_
3757         /* check the stack is aligned */
3758         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3759         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3760         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3761         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3762         x86_breakpoint (code);
3763 #endif
3764
3765         /* compute max_offset in order to use short forward jumps */
3766         max_offset = 0;
3767         if (cfg->opt & MONO_OPT_BRANCH) {
3768                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3769                         MonoInst *ins = bb->code;
3770                         bb->max_offset = max_offset;
3771
3772                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3773                                 max_offset += 6;
3774                         /* max alignment for loops */
3775                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3776                                 max_offset += LOOP_ALIGNMENT;
3777
3778                         while (ins) {
3779                                 if (ins->opcode == OP_LABEL)
3780                                         ins->inst_c1 = max_offset;
3781                                 
3782                                 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3783                                 ins = ins->next;
3784                         }
3785                 }
3786         }
3787
3788         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3789                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3790
3791         /* load arguments allocated to register from the stack */
3792         sig = mono_method_signature (method);
3793         pos = 0;
3794
3795         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3796                 inst = cfg->args [pos];
3797                 if (inst->opcode == OP_REGVAR) {
3798                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3799                         if (cfg->verbose_level > 2)
3800                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3801                 }
3802                 pos++;
3803         }
3804
3805         cfg->code_len = code - cfg->native_code;
3806
3807         return code;
3808 }
3809
3810 void
3811 mono_arch_emit_epilog (MonoCompile *cfg)
3812 {
3813         MonoMethod *method = cfg->method;
3814         MonoMethodSignature *sig = mono_method_signature (method);
3815         int quad, pos;
3816         guint32 stack_to_pop;
3817         guint8 *code;
3818         int max_epilog_size = 16;
3819         CallInfo *cinfo;
3820         
3821         if (cfg->method->save_lmf)
3822                 max_epilog_size += 128;
3823
3824         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3825                 cfg->code_size *= 2;
3826                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3827                 mono_jit_stats.code_reallocs++;
3828         }
3829
3830         code = cfg->native_code + cfg->code_len;
3831
3832         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3833                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3834
3835         /* the code restoring the registers must be kept in sync with OP_JMP */
3836         pos = 0;
3837         
3838         if (method->save_lmf) {
3839                 gint32 prev_lmf_reg;
3840                 gint32 lmf_offset = -sizeof (MonoLMF);
3841
3842                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3843                         /*
3844                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3845                          * through the mono_lmf_addr TLS variable.
3846                          */
3847                         /* reg = previous_lmf */
3848                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3849
3850                         /* lmf = previous_lmf */
3851                         x86_prefix (code, X86_GS_PREFIX);
3852                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
3853                 } else {
3854                         /* Find a spare register */
3855                         switch (sig->ret->type) {
3856                         case MONO_TYPE_I8:
3857                         case MONO_TYPE_U8:
3858                                 prev_lmf_reg = X86_EDI;
3859                                 cfg->used_int_regs |= (1 << X86_EDI);
3860                                 break;
3861                         default:
3862                                 prev_lmf_reg = X86_EDX;
3863                                 break;
3864                         }
3865
3866                         /* reg = previous_lmf */
3867                         x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3868
3869                         /* ecx = lmf */
3870                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3871
3872                         /* *(lmf) = previous_lmf */
3873                         x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3874                 }
3875
3876                 /* restore caller saved regs */
3877                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3878                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3879                 }
3880
3881                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3882                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3883                 }
3884                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3885                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3886                 }
3887
3888                 /* EBP is restored by LEAVE */
3889         } else {
3890                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3891                         pos -= 4;
3892                 }
3893                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3894                         pos -= 4;
3895                 }
3896                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3897                         pos -= 4;
3898                 }
3899
3900                 if (pos)
3901                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3902
3903                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3904                         x86_pop_reg (code, X86_ESI);
3905                 }
3906                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3907                         x86_pop_reg (code, X86_EDI);
3908                 }
3909                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3910                         x86_pop_reg (code, X86_EBX);
3911                 }
3912         }
3913
3914         /* Load returned vtypes into registers if needed */
3915         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
3916         if (cinfo->ret.storage == ArgValuetypeInReg) {
3917                 for (quad = 0; quad < 2; quad ++) {
3918                         switch (cinfo->ret.pair_storage [quad]) {
3919                         case ArgInIReg:
3920                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3921                                 break;
3922                         case ArgOnFloatFpStack:
3923                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3924                                 break;
3925                         case ArgOnDoubleFpStack:
3926                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3927                                 break;
3928                         case ArgNone:
3929                                 break;
3930                         default:
3931                                 g_assert_not_reached ();
3932                         }
3933                 }
3934         }
3935
3936         x86_leave (code);
3937
3938         if (CALLCONV_IS_STDCALL (sig)) {
3939                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3940
3941                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3942         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3943                 stack_to_pop = 4;
3944         else
3945                 stack_to_pop = 0;
3946
3947         if (stack_to_pop)
3948                 x86_ret_imm (code, stack_to_pop);
3949         else
3950                 x86_ret (code);
3951
3952         cfg->code_len = code - cfg->native_code;
3953
3954         g_assert (cfg->code_len < cfg->code_size);
3955 }
3956
3957 void
3958 mono_arch_emit_exceptions (MonoCompile *cfg)
3959 {
3960         MonoJumpInfo *patch_info;
3961         int nthrows, i;
3962         guint8 *code;
3963         MonoClass *exc_classes [16];
3964         guint8 *exc_throw_start [16], *exc_throw_end [16];
3965         guint32 code_size;
3966         int exc_count = 0;
3967
3968         /* Compute needed space */
3969         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3970                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3971                         exc_count++;
3972         }
3973
3974         /* 
3975          * make sure we have enough space for exceptions
3976          * 16 is the size of two push_imm instructions and a call
3977          */
3978         if (cfg->compile_aot)
3979                 code_size = exc_count * 32;
3980         else
3981                 code_size = exc_count * 16;
3982
3983         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3984                 cfg->code_size *= 2;
3985                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3986                 mono_jit_stats.code_reallocs++;
3987         }
3988
3989         code = cfg->native_code + cfg->code_len;
3990
3991         nthrows = 0;
3992         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3993                 switch (patch_info->type) {
3994                 case MONO_PATCH_INFO_EXC: {
3995                         MonoClass *exc_class;
3996                         guint8 *buf, *buf2;
3997                         guint32 throw_ip;
3998
3999                         x86_patch (patch_info->ip.i + cfg->native_code, code);
4000
4001                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4002                         g_assert (exc_class);
4003                         throw_ip = patch_info->ip.i;
4004
4005                         /* Find a throw sequence for the same exception class */
4006                         for (i = 0; i < nthrows; ++i)
4007                                 if (exc_classes [i] == exc_class)
4008                                         break;
4009                         if (i < nthrows) {
4010                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4011                                 x86_jump_code (code, exc_throw_start [i]);
4012                                 patch_info->type = MONO_PATCH_INFO_NONE;
4013                         }
4014                         else {
4015                                 guint32 size;
4016
4017                                 /* Compute size of code following the push <OFFSET> */
4018                                 size = 5 + 5;
4019
4020                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4021                                         /* Use the shorter form */
4022                                         buf = buf2 = code;
4023                                         x86_push_imm (code, 0);
4024                                 }
4025                                 else {
4026                                         buf = code;
4027                                         x86_push_imm (code, 0xf0f0f0f0);
4028                                         buf2 = code;
4029                                 }
4030
4031                                 if (nthrows < 16) {
4032                                         exc_classes [nthrows] = exc_class;
4033                                         exc_throw_start [nthrows] = code;
4034                                 }
4035
4036                                 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
4037                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4038                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4039                                 patch_info->ip.i = code - cfg->native_code;
4040                                 x86_call_code (code, 0);
4041                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4042                                 while (buf < buf2)
4043                                         x86_nop (buf);
4044
4045                                 if (nthrows < 16) {
4046                                         exc_throw_end [nthrows] = code;
4047                                         nthrows ++;
4048                                 }
4049                         }
4050                         break;
4051                 }
4052                 default:
4053                         /* do nothing */
4054                         break;
4055                 }
4056         }
4057
4058         cfg->code_len = code - cfg->native_code;
4059
4060         g_assert (cfg->code_len < cfg->code_size);
4061 }
4062
4063 void
4064 mono_arch_flush_icache (guint8 *code, gint size)
4065 {
4066         /* not needed */
4067 }
4068
4069 void
4070 mono_arch_flush_register_windows (void)
4071 {
4072 }
4073
4074 /*
4075  * Support for fast access to the thread-local lmf structure using the GS
4076  * segment register on NPTL + kernel 2.6.x.
4077  */
4078
4079 static gboolean tls_offset_inited = FALSE;
4080
4081 void
4082 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4083 {
4084         if (!tls_offset_inited) {
4085                 if (!getenv ("MONO_NO_TLS")) {
4086 #ifdef PLATFORM_WIN32
4087                         /* 
4088                          * We need to init this multiple times, since when we are first called, the key might not
4089                          * be initialized yet.
4090                          */
4091                         appdomain_tls_offset = mono_domain_get_tls_key ();
4092                         lmf_tls_offset = mono_get_jit_tls_key ();
4093                         thread_tls_offset = mono_thread_get_tls_key ();
4094
4095                         /* Only 64 tls entries can be accessed using inline code */
4096                         if (appdomain_tls_offset >= 64)
4097                                 appdomain_tls_offset = -1;
4098                         if (lmf_tls_offset >= 64)
4099                                 lmf_tls_offset = -1;
4100                         if (thread_tls_offset >= 64)
4101                                 thread_tls_offset = -1;
4102 #else
4103 #if MONO_XEN_OPT
4104                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
4105 #endif
4106                         tls_offset_inited = TRUE;
4107                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4108                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4109                         lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
4110                         thread_tls_offset = mono_thread_get_tls_offset ();
4111 #endif
4112                 }
4113         }               
4114 }
4115
4116 void
4117 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4118 {
4119 }
4120
4121 void
4122 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4123 {
4124         MonoCallInst *call = (MonoCallInst*)inst;
4125         CallInfo *cinfo = get_call_info (cfg, cfg->mempool, inst->signature, FALSE);
4126
4127         /* add the this argument */
4128         if (this_reg != -1) {
4129                 if (cinfo->args [0].storage == ArgInIReg) {
4130                         MonoInst *this;
4131                         MONO_INST_NEW (cfg, this, OP_MOVE);
4132                         this->type = this_type;
4133                         this->sreg1 = this_reg;
4134                         this->dreg = mono_regstate_next_int (cfg->rs);
4135                         mono_bblock_add_inst (cfg->cbb, this);
4136
4137                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
4138                 }
4139                 else {
4140                         MonoInst *this;
4141                         MONO_INST_NEW (cfg, this, OP_OUTARG);
4142                         this->type = this_type;
4143                         this->sreg1 = this_reg;
4144                         mono_bblock_add_inst (cfg->cbb, this);
4145                 }
4146         }
4147
4148         if (vt_reg != -1) {
4149                 MonoInst *vtarg;
4150
4151                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4152                         /*
4153                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4154                          * the stack. Save the address here, so the call instruction can
4155                          * access it.
4156                          */
4157                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4158                         vtarg->inst_destbasereg = X86_ESP;
4159                         vtarg->inst_offset = inst->stack_usage;
4160                         vtarg->sreg1 = vt_reg;
4161                         mono_bblock_add_inst (cfg->cbb, vtarg);
4162                 }
4163                 else if (cinfo->ret.storage == ArgInIReg) {
4164                         /* The return address is passed in a register */
4165                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
4166                         vtarg->sreg1 = vt_reg;
4167                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
4168                         mono_bblock_add_inst (cfg->cbb, vtarg);
4169
4170                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
4171                 } else {
4172                         MonoInst *vtarg;
4173                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4174                         vtarg->type = STACK_MP;
4175                         vtarg->sreg1 = vt_reg;
4176                         mono_bblock_add_inst (cfg->cbb, vtarg);
4177                 }
4178         }
4179 }
4180
4181 #ifdef MONO_ARCH_HAVE_IMT
4182
4183 // Linear handler, the bsearch head compare is shorter
4184 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
4185 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
4186 //        x86_patch(ins,target)
4187 //[1 + 5] x86_jump_mem(inst,mem)
4188
4189 #define CMP_SIZE 6
4190 #define BR_SMALL_SIZE 2
4191 #define BR_LARGE_SIZE 5
4192 #define JUMP_IMM_SIZE 6
4193 #define ENABLE_WRONG_METHOD_CHECK 0
4194
4195 static int
4196 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
4197 {
4198         int i, distance = 0;
4199         for (i = start; i < target; ++i)
4200                 distance += imt_entries [i]->chunk_size;
4201         return distance;
4202 }
4203
4204 /*
4205  * LOCKING: called with the domain lock held
4206  */
4207 gpointer
4208 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count)
4209 {
4210         int i;
4211         int size = 0;
4212         guint8 *code, *start;
4213
4214         for (i = 0; i < count; ++i) {
4215                 MonoIMTCheckItem *item = imt_entries [i];
4216                 if (item->is_equals) {
4217                         if (item->check_target_idx) {
4218                                 if (!item->compare_done)
4219                                         item->chunk_size += CMP_SIZE;
4220                                 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
4221                         } else {
4222                                 item->chunk_size += JUMP_IMM_SIZE;
4223 #if ENABLE_WRONG_METHOD_CHECK
4224                                 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
4225 #endif
4226                         }
4227                 } else {
4228                         item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
4229                         imt_entries [item->check_target_idx]->compare_done = TRUE;
4230                 }
4231                 size += item->chunk_size;
4232         }
4233         code = mono_code_manager_reserve (domain->code_mp, size);
4234         start = code;
4235         for (i = 0; i < count; ++i) {
4236                 MonoIMTCheckItem *item = imt_entries [i];
4237                 item->code_target = code;
4238                 if (item->is_equals) {
4239                         if (item->check_target_idx) {
4240                                 if (!item->compare_done)
4241                                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4242                                 item->jmp_code = code;
4243                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);
4244                                 x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
4245                         } else {
4246                                 /* enable the commented code to assert on wrong method */
4247 #if ENABLE_WRONG_METHOD_CHECK
4248                                 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4249                                 item->jmp_code = code;
4250                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);
4251 #endif
4252                                 x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
4253 #if ENABLE_WRONG_METHOD_CHECK
4254                                 x86_patch (item->jmp_code, code);
4255                                 x86_breakpoint (code);
4256                                 item->jmp_code = NULL;
4257 #endif
4258                         }
4259                 } else {
4260                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4261                         item->jmp_code = code;
4262                         if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
4263                                 x86_branch8 (code, X86_CC_GE, 0, FALSE);
4264                         else
4265                                 x86_branch32 (code, X86_CC_GE, 0, FALSE);
4266                 }
4267         }
4268         /* patch the branches to get to the target items */
4269         for (i = 0; i < count; ++i) {
4270                 MonoIMTCheckItem *item = imt_entries [i];
4271                 if (item->jmp_code) {
4272                         if (item->check_target_idx) {
4273                                 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
4274                         }
4275                 }
4276         }
4277                 
4278         mono_stats.imt_thunks_size += code - start;
4279         g_assert (code - start <= size);
4280         return start;
4281 }
4282
4283 MonoMethod*
4284 mono_arch_find_imt_method (gpointer *regs, guint8 *code)
4285 {
4286         return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
4287 }
4288
4289 MonoObject*
4290 mono_arch_find_this_argument (gpointer *regs, MonoMethod *method)
4291 {
4292         MonoMethodSignature *sig = mono_method_signature (method);
4293         CallInfo *cinfo = get_call_info (NULL, NULL, sig, FALSE);
4294         int this_argument_offset;
4295         MonoObject *this_argument;
4296
4297         /* 
4298          * this is the offset of the this arg from esp as saved at the start of 
4299          * mono_arch_create_trampoline_code () in tramp-x86.c.
4300          */
4301         this_argument_offset = 5;
4302         if (MONO_TYPE_ISSTRUCT (sig->ret) && (cinfo->ret.storage == ArgOnStack))
4303                 this_argument_offset++;
4304
4305         this_argument = * (MonoObject**) (((guint8*) regs [X86_ESP]) + this_argument_offset * sizeof (gpointer));
4306
4307         g_free (cinfo);
4308         return this_argument;
4309 }
4310 #endif
4311
4312 MonoInst*
4313 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4314 {
4315         MonoInst *ins = NULL;
4316
4317         if (cmethod->klass == mono_defaults.math_class) {
4318                 if (strcmp (cmethod->name, "Sin") == 0) {
4319                         MONO_INST_NEW (cfg, ins, OP_SIN);
4320                         ins->inst_i0 = args [0];
4321                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4322                         MONO_INST_NEW (cfg, ins, OP_COS);
4323                         ins->inst_i0 = args [0];
4324                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4325                         MONO_INST_NEW (cfg, ins, OP_TAN);
4326                         ins->inst_i0 = args [0];
4327                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4328                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4329                         ins->inst_i0 = args [0];
4330                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4331                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4332                         ins->inst_i0 = args [0];
4333                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4334                         MONO_INST_NEW (cfg, ins, OP_ABS);
4335                         ins->inst_i0 = args [0];
4336                 }
4337 #if 0
4338                 /* OP_FREM is not IEEE compatible */
4339                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4340                         MONO_INST_NEW (cfg, ins, OP_FREM);
4341                         ins->inst_i0 = args [0];
4342                         ins->inst_i1 = args [1];
4343                 }
4344 #endif
4345         } else if (cmethod->klass == mono_defaults.thread_class &&
4346                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
4347                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
4348         } else if(cmethod->klass->image == mono_defaults.corlib &&
4349                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4350                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4351
4352                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4353                         MonoInst *ins_iconst;
4354
4355                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4356                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4357                         ins_iconst->inst_c0 = 1;
4358
4359                         ins->inst_i0 = args [0];
4360                         ins->inst_i1 = ins_iconst;
4361                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4362                         MonoInst *ins_iconst;
4363
4364                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4365                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4366                         ins_iconst->inst_c0 = -1;
4367
4368                         ins->inst_i0 = args [0];
4369                         ins->inst_i1 = ins_iconst;
4370                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4371                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4372
4373                         ins->inst_i0 = args [0];
4374                         ins->inst_i1 = args [1];
4375                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4376                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4377
4378                         ins->inst_i0 = args [0];
4379                         ins->inst_i1 = args [1];
4380                 }
4381         }
4382
4383         return ins;
4384 }
4385
4386
4387 gboolean
4388 mono_arch_print_tree (MonoInst *tree, int arity)
4389 {
4390         return 0;
4391 }
4392
4393 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4394 {
4395         MonoInst* ins;
4396         
4397         if (appdomain_tls_offset == -1)
4398                 return NULL;
4399
4400         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4401         ins->inst_offset = appdomain_tls_offset;
4402         return ins;
4403 }
4404
4405 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4406 {
4407         MonoInst* ins;
4408
4409         if (thread_tls_offset == -1)
4410                 return NULL;
4411
4412         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4413         ins->inst_offset = thread_tls_offset;
4414         return ins;
4415 }
4416
4417 guint32
4418 mono_arch_get_patch_offset (guint8 *code)
4419 {
4420         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4421                 return 2;
4422         else if ((code [0] == 0xba))
4423                 return 1;
4424         else if ((code [0] == 0x68))
4425                 /* push IMM */
4426                 return 1;
4427         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4428                 /* push <OFFSET>(<REG>) */
4429                 return 2;
4430         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4431                 /* call *<OFFSET>(<REG>) */
4432                 return 2;
4433         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4434                 /* fldl <ADDR> */
4435                 return 2;
4436         else if ((code [0] == 0x58) && (code [1] == 0x05))
4437                 /* pop %eax; add <OFFSET>, %eax */
4438                 return 2;
4439         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4440                 /* pop <REG>; add <OFFSET>, <REG> */
4441                 return 3;
4442         else {
4443                 g_assert_not_reached ();
4444                 return -1;
4445         }
4446 }
4447
4448 gpointer
4449 mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
4450 {
4451         guint8 reg = 0;
4452         gint32 disp = 0;
4453
4454         *displacement = 0;
4455
4456         /* go to the start of the call instruction
4457          *
4458          * address_byte = (m << 6) | (o << 3) | reg
4459          * call opcode: 0xff address_byte displacement
4460          * 0xff m=1,o=2 imm8
4461          * 0xff m=2,o=2 imm32
4462          */
4463         code -= 6;
4464
4465         /* 
4466          * A given byte sequence can match more than case here, so we have to be
4467          * really careful about the ordering of the cases. Longer sequences
4468          * come first.
4469          */
4470         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
4471                 /*
4472                  * This is an interface call
4473                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
4474                  * ff 10                   call   *(%eax)
4475                  */
4476                 reg = x86_modrm_rm (code [5]);
4477                 disp = 0;
4478 #ifdef MONO_ARCH_HAVE_IMT
4479         } else if ((code [-2] == 0xba) && (code [3] == 0xff) && (x86_modrm_mod (code [4]) == 1) && (x86_modrm_reg (code [4]) == 2) && ((signed char)code [5] < 0)) {
4480                 /* IMT-based interface calls: with MONO_ARCH_IMT_REG == edx
4481                  * ba 14 f8 28 08          mov    $0x828f814,%edx
4482                  * ff 50 fc                call   *0xfffffffc(%eax)
4483                  */
4484                 reg = code [4] & 0x07;
4485                 disp = (signed char)code [5];
4486 #endif
4487         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4488                 reg = code [4] & 0x07;
4489                 disp = (signed char)code [5];
4490         } else {
4491                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4492                         reg = code [1] & 0x07;
4493                         disp = *((gint32*)(code + 2));
4494                 } else if ((code [1] == 0xe8)) {
4495                         return NULL;
4496                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4497                         /*
4498                          * This is a interface call
4499                          * 8b 40 30   mov    0x30(%eax),%eax
4500                          * ff 10      call   *(%eax)
4501                          */
4502                         disp = 0;
4503                         reg = code [5] & 0x07;
4504                 }
4505                 else
4506                         return NULL;
4507         }
4508
4509         *displacement = disp;
4510         return regs [reg];
4511 }
4512
4513 gpointer*
4514 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4515 {
4516         gpointer vt;
4517         int displacement;
4518         vt = mono_arch_get_vcall_slot (code, regs, &displacement);
4519         if (!vt)
4520                 return NULL;
4521         return (gpointer*)((char*)vt + displacement);
4522 }
4523
4524 gpointer
4525 mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code)
4526 {
4527         guint32 esp = regs [X86_ESP];
4528         CallInfo *cinfo;
4529         gpointer res;
4530
4531         cinfo = get_call_info (NULL, NULL, sig, FALSE);
4532
4533         /*
4534          * The stack looks like:
4535          * <other args>
4536          * <this=delegate>
4537          * <possible vtype return address>
4538          * <return addr>
4539          * <4 pointers pushed by mono_arch_create_trampoline_code ()>
4540          */
4541         res = (((MonoObject**)esp) [5 + (cinfo->args [0].offset / 4)]);
4542         g_free (cinfo);
4543         return res;
4544 }
4545
4546 #define MAX_ARCH_DELEGATE_PARAMS 10
4547
4548 gpointer
4549 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
4550 {
4551         guint8 *code, *start;
4552
4553         if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
4554                 return NULL;
4555
4556         /* FIXME: Support more cases */
4557         if (MONO_TYPE_ISSTRUCT (sig->ret))
4558                 return NULL;
4559
4560         /*
4561          * The stack contains:
4562          * <delegate>
4563          * <return addr>
4564          */
4565
4566         if (has_target) {
4567                 static guint8* cached = NULL;
4568                 mono_mini_arch_lock ();
4569                 if (cached) {
4570                         mono_mini_arch_unlock ();
4571                         return cached;
4572                 }
4573                 
4574                 start = code = mono_global_codeman_reserve (64);
4575
4576                 /* Replace the this argument with the target */
4577                 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4578                 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
4579                 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
4580                 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4581
4582                 g_assert ((code - start) < 64);
4583
4584                 cached = start;
4585
4586                 mono_mini_arch_unlock ();
4587         } else {
4588                 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
4589                 int i = 0;
4590                 /* 8 for mov_reg and jump, plus 8 for each parameter */
4591                 int code_reserve = 8 + (sig->param_count * 8);
4592
4593                 for (i = 0; i < sig->param_count; ++i)
4594                         if (!mono_is_regsize_var (sig->params [i]))
4595                                 return NULL;
4596
4597                 mono_mini_arch_lock ();
4598                 code = cache [sig->param_count];
4599                 if (code) {
4600                         mono_mini_arch_unlock ();
4601                         return code;
4602                 }
4603
4604                 /*
4605                  * The stack contains:
4606                  * <args in reverse order>
4607                  * <delegate>
4608                  * <return addr>
4609                  *
4610                  * and we need:
4611                  * <args in reverse order>
4612                  * <return addr>
4613                  * 
4614                  * without unbalancing the stack.
4615                  * So move each arg up a spot in the stack (overwriting un-needed 'this' arg)
4616                  * and leaving original spot of first arg as placeholder in stack so
4617                  * when callee pops stack everything works.
4618                  */
4619
4620                 start = code = mono_global_codeman_reserve (code_reserve);
4621
4622                 /* store delegate for access to method_ptr */
4623                 x86_mov_reg_membase (code, X86_ECX, X86_ESP, 4, 4);
4624
4625                 /* move args up */
4626                 for (i = 0; i < sig->param_count; ++i) {
4627                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, (i+2)*4, 4);
4628                         x86_mov_membase_reg (code, X86_ESP, (i+1)*4, X86_EAX, 4);
4629                 }
4630
4631                 x86_jump_membase (code, X86_ECX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4632
4633                 g_assert ((code - start) < code_reserve);
4634
4635                 cache [sig->param_count] = start;
4636
4637                 mono_mini_arch_unlock ();
4638         }
4639
4640         return start;
4641 }