2007-10-10 Mark Probst <mark.probst@gmail.com>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
17
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/utils/mono-math.h>
23
24 #include "trace.h"
25 #include "mini-x86.h"
26 #include "inssel.h"
27 #include "cpu-x86.h"
28
29 /* On windows, these hold the key returned by TlsAlloc () */
30 static gint lmf_tls_offset = -1;
31 static gint lmf_addr_tls_offset = -1;
32 static gint appdomain_tls_offset = -1;
33 static gint thread_tls_offset = -1;
34
35 #ifdef MONO_XEN_OPT
36 static gboolean optimize_for_xen = TRUE;
37 #else
38 #define optimize_for_xen 0
39 #endif
40
41 #ifdef PLATFORM_WIN32
42 static gboolean is_win32 = TRUE;
43 #else
44 static gboolean is_win32 = FALSE;
45 #endif
46
47 /* This mutex protects architecture specific caches */
48 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
49 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
50 static CRITICAL_SECTION mini_arch_mutex;
51
52 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
53
54 #define ARGS_OFFSET 8
55
56 #ifdef PLATFORM_WIN32
57 /* Under windows, the default pinvoke calling convention is stdcall */
58 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
59 #else
60 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
61 #endif
62
63 #define NOT_IMPLEMENTED g_assert_not_reached ()
64
65 const char*
66 mono_arch_regname (int reg) {
67         switch (reg) {
68         case X86_EAX: return "%eax";
69         case X86_EBX: return "%ebx";
70         case X86_ECX: return "%ecx";
71         case X86_EDX: return "%edx";
72         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
73         case X86_EDI: return "%edi";
74         case X86_ESI: return "%esi";
75         }
76         return "unknown";
77 }
78
79 const char*
80 mono_arch_fregname (int reg) {
81         return "unknown";
82 }
83
84 typedef enum {
85         ArgInIReg,
86         ArgInFloatSSEReg,
87         ArgInDoubleSSEReg,
88         ArgOnStack,
89         ArgValuetypeInReg,
90         ArgOnFloatFpStack,
91         ArgOnDoubleFpStack,
92         ArgNone
93 } ArgStorage;
94
95 typedef struct {
96         gint16 offset;
97         gint8  reg;
98         ArgStorage storage;
99
100         /* Only if storage == ArgValuetypeInReg */
101         ArgStorage pair_storage [2];
102         gint8 pair_regs [2];
103 } ArgInfo;
104
105 typedef struct {
106         int nargs;
107         guint32 stack_usage;
108         guint32 reg_usage;
109         guint32 freg_usage;
110         gboolean need_stack_align;
111         guint32 stack_align_amount;
112         ArgInfo ret;
113         ArgInfo sig_cookie;
114         ArgInfo args [1];
115 } CallInfo;
116
117 #define PARAM_REGS 0
118
119 #define FLOAT_PARAM_REGS 0
120
121 static X86_Reg_No param_regs [] = { 0 };
122
123 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
124 #define SMALL_STRUCTS_IN_REGS
125 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
126 #endif
127
128 static void inline
129 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
130 {
131     ainfo->offset = *stack_size;
132
133     if (*gr >= PARAM_REGS) {
134                 ainfo->storage = ArgOnStack;
135                 (*stack_size) += sizeof (gpointer);
136     }
137     else {
138                 ainfo->storage = ArgInIReg;
139                 ainfo->reg = param_regs [*gr];
140                 (*gr) ++;
141     }
142 }
143
144 static void inline
145 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
146 {
147         ainfo->offset = *stack_size;
148
149         g_assert (PARAM_REGS == 0);
150         
151         ainfo->storage = ArgOnStack;
152         (*stack_size) += sizeof (gpointer) * 2;
153 }
154
155 static void inline
156 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
157 {
158     ainfo->offset = *stack_size;
159
160     if (*gr >= FLOAT_PARAM_REGS) {
161                 ainfo->storage = ArgOnStack;
162                 (*stack_size) += is_double ? 8 : 4;
163     }
164     else {
165                 /* A double register */
166                 if (is_double)
167                         ainfo->storage = ArgInDoubleSSEReg;
168                 else
169                         ainfo->storage = ArgInFloatSSEReg;
170                 ainfo->reg = *gr;
171                 (*gr) += 1;
172     }
173 }
174
175
176 static void
177 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
178                gboolean is_return,
179                guint32 *gr, guint32 *fr, guint32 *stack_size)
180 {
181         guint32 size;
182         MonoClass *klass;
183
184         klass = mono_class_from_mono_type (type);
185         if (sig->pinvoke) 
186                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
187         else 
188                 size = mono_type_stack_size (&klass->byval_arg, NULL);
189
190 #ifdef SMALL_STRUCTS_IN_REGS
191         if (sig->pinvoke && is_return) {
192                 MonoMarshalType *info;
193
194                 /*
195                  * the exact rules are not very well documented, the code below seems to work with the 
196                  * code generated by gcc 3.3.3 -mno-cygwin.
197                  */
198                 info = mono_marshal_load_type_info (klass);
199                 g_assert (info);
200
201                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
202
203                 /* Special case structs with only a float member */
204                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
205                         ainfo->storage = ArgValuetypeInReg;
206                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
207                         return;
208                 }
209                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
210                         ainfo->storage = ArgValuetypeInReg;
211                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
212                         return;
213                 }               
214                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
215                         ainfo->storage = ArgValuetypeInReg;
216                         ainfo->pair_storage [0] = ArgInIReg;
217                         ainfo->pair_regs [0] = return_regs [0];
218                         if (info->native_size > 4) {
219                                 ainfo->pair_storage [1] = ArgInIReg;
220                                 ainfo->pair_regs [1] = return_regs [1];
221                         }
222                         return;
223                 }
224         }
225 #endif
226
227         ainfo->offset = *stack_size;
228         ainfo->storage = ArgOnStack;
229         *stack_size += ALIGN_TO (size, sizeof (gpointer));
230 }
231
232 /*
233  * get_call_info:
234  *
235  *  Obtain information about a call according to the calling convention.
236  * For x86 ELF, see the "System V Application Binary Interface Intel386 
237  * Architecture Processor Supplment, Fourth Edition" document for more
238  * information.
239  * For x86 win32, see ???.
240  */
241 static CallInfo*
242 get_call_info (MonoCompile *cfg, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
243 {
244         guint32 i, gr, fr;
245         MonoType *ret_type;
246         int n = sig->hasthis + sig->param_count;
247         guint32 stack_size = 0;
248         CallInfo *cinfo;
249
250         if (mp)
251                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
252         else
253                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
254
255         gr = 0;
256         fr = 0;
257
258         /* return value */
259         {
260                 ret_type = mono_type_get_underlying_type (sig->ret);
261                 ret_type = mini_get_basic_type_from_generic (cfg, ret_type);
262                 switch (ret_type->type) {
263                 case MONO_TYPE_BOOLEAN:
264                 case MONO_TYPE_I1:
265                 case MONO_TYPE_U1:
266                 case MONO_TYPE_I2:
267                 case MONO_TYPE_U2:
268                 case MONO_TYPE_CHAR:
269                 case MONO_TYPE_I4:
270                 case MONO_TYPE_U4:
271                 case MONO_TYPE_I:
272                 case MONO_TYPE_U:
273                 case MONO_TYPE_PTR:
274                 case MONO_TYPE_FNPTR:
275                 case MONO_TYPE_CLASS:
276                 case MONO_TYPE_OBJECT:
277                 case MONO_TYPE_SZARRAY:
278                 case MONO_TYPE_ARRAY:
279                 case MONO_TYPE_STRING:
280                         cinfo->ret.storage = ArgInIReg;
281                         cinfo->ret.reg = X86_EAX;
282                         break;
283                 case MONO_TYPE_U8:
284                 case MONO_TYPE_I8:
285                         cinfo->ret.storage = ArgInIReg;
286                         cinfo->ret.reg = X86_EAX;
287                         break;
288                 case MONO_TYPE_R4:
289                         cinfo->ret.storage = ArgOnFloatFpStack;
290                         break;
291                 case MONO_TYPE_R8:
292                         cinfo->ret.storage = ArgOnDoubleFpStack;
293                         break;
294                 case MONO_TYPE_GENERICINST:
295                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
296                                 cinfo->ret.storage = ArgInIReg;
297                                 cinfo->ret.reg = X86_EAX;
298                                 break;
299                         }
300                         /* Fall through */
301                 case MONO_TYPE_VALUETYPE: {
302                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
303
304                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
305                         if (cinfo->ret.storage == ArgOnStack)
306                                 /* The caller passes the address where the value is stored */
307                                 add_general (&gr, &stack_size, &cinfo->ret);
308                         break;
309                 }
310                 case MONO_TYPE_TYPEDBYREF:
311                         /* Same as a valuetype with size 24 */
312                         add_general (&gr, &stack_size, &cinfo->ret);
313                         ;
314                         break;
315                 case MONO_TYPE_VOID:
316                         cinfo->ret.storage = ArgNone;
317                         break;
318                 default:
319                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
320                 }
321         }
322
323         /* this */
324         if (sig->hasthis)
325                 add_general (&gr, &stack_size, cinfo->args + 0);
326
327         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
328                 gr = PARAM_REGS;
329                 fr = FLOAT_PARAM_REGS;
330                 
331                 /* Emit the signature cookie just before the implicit arguments */
332                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
333         }
334
335         for (i = 0; i < sig->param_count; ++i) {
336                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
337                 MonoType *ptype;
338
339                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
340                         /* We allways pass the sig cookie on the stack for simplicity */
341                         /* 
342                          * Prevent implicit arguments + the sig cookie from being passed 
343                          * in registers.
344                          */
345                         gr = PARAM_REGS;
346                         fr = FLOAT_PARAM_REGS;
347
348                         /* Emit the signature cookie just before the implicit arguments */
349                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
350                 }
351
352                 if (sig->params [i]->byref) {
353                         add_general (&gr, &stack_size, ainfo);
354                         continue;
355                 }
356                 ptype = mono_type_get_underlying_type (sig->params [i]);
357                 ptype = mini_get_basic_type_from_generic (cfg, ptype);
358                 switch (ptype->type) {
359                 case MONO_TYPE_BOOLEAN:
360                 case MONO_TYPE_I1:
361                 case MONO_TYPE_U1:
362                         add_general (&gr, &stack_size, ainfo);
363                         break;
364                 case MONO_TYPE_I2:
365                 case MONO_TYPE_U2:
366                 case MONO_TYPE_CHAR:
367                         add_general (&gr, &stack_size, ainfo);
368                         break;
369                 case MONO_TYPE_I4:
370                 case MONO_TYPE_U4:
371                         add_general (&gr, &stack_size, ainfo);
372                         break;
373                 case MONO_TYPE_I:
374                 case MONO_TYPE_U:
375                 case MONO_TYPE_PTR:
376                 case MONO_TYPE_FNPTR:
377                 case MONO_TYPE_CLASS:
378                 case MONO_TYPE_OBJECT:
379                 case MONO_TYPE_STRING:
380                 case MONO_TYPE_SZARRAY:
381                 case MONO_TYPE_ARRAY:
382                         add_general (&gr, &stack_size, ainfo);
383                         break;
384                 case MONO_TYPE_GENERICINST:
385                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
386                                 add_general (&gr, &stack_size, ainfo);
387                                 break;
388                         }
389                         /* Fall through */
390                 case MONO_TYPE_VALUETYPE:
391                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
392                         break;
393                 case MONO_TYPE_TYPEDBYREF:
394                         stack_size += sizeof (MonoTypedRef);
395                         ainfo->storage = ArgOnStack;
396                         break;
397                 case MONO_TYPE_U8:
398                 case MONO_TYPE_I8:
399                         add_general_pair (&gr, &stack_size, ainfo);
400                         break;
401                 case MONO_TYPE_R4:
402                         add_float (&fr, &stack_size, ainfo, FALSE);
403                         break;
404                 case MONO_TYPE_R8:
405                         add_float (&fr, &stack_size, ainfo, TRUE);
406                         break;
407                 default:
408                         g_error ("unexpected type 0x%x", ptype->type);
409                         g_assert_not_reached ();
410                 }
411         }
412
413         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
414                 gr = PARAM_REGS;
415                 fr = FLOAT_PARAM_REGS;
416                 
417                 /* Emit the signature cookie just before the implicit arguments */
418                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
419         }
420
421 #if defined(__APPLE__)
422         if ((stack_size % 16) != 0) { 
423                 cinfo->need_stack_align = TRUE;
424                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
425         }
426 #endif
427
428         cinfo->stack_usage = stack_size;
429         cinfo->reg_usage = gr;
430         cinfo->freg_usage = fr;
431         return cinfo;
432 }
433
434 /*
435  * mono_arch_get_argument_info:
436  * @csig:  a method signature
437  * @param_count: the number of parameters to consider
438  * @arg_info: an array to store the result infos
439  *
440  * Gathers information on parameters such as size, alignment and
441  * padding. arg_info should be large enought to hold param_count + 1 entries. 
442  *
443  * Returns the size of the activation frame.
444  */
445 int
446 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
447 {
448         int k, frame_size = 0;
449         int size, pad;
450         guint32 align;
451         int offset = 8;
452         CallInfo *cinfo;
453
454         cinfo = get_call_info (NULL, NULL, csig, FALSE);
455
456         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
457                 frame_size += sizeof (gpointer);
458                 offset += 4;
459         }
460
461         arg_info [0].offset = offset;
462
463         if (csig->hasthis) {
464                 frame_size += sizeof (gpointer);
465                 offset += 4;
466         }
467
468         arg_info [0].size = frame_size;
469
470         for (k = 0; k < param_count; k++) {
471                 
472                 if (csig->pinvoke)
473                         size = mono_type_native_stack_size (csig->params [k], &align);
474                 else {
475                         int ialign;
476                         size = mono_type_stack_size (csig->params [k], &ialign);
477                         align = ialign;
478                 }
479
480                 /* ignore alignment for now */
481                 align = 1;
482
483                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
484                 arg_info [k].pad = pad;
485                 frame_size += size;
486                 arg_info [k + 1].pad = 0;
487                 arg_info [k + 1].size = size;
488                 offset += pad;
489                 arg_info [k + 1].offset = offset;
490                 offset += size;
491         }
492
493         align = MONO_ARCH_FRAME_ALIGNMENT;
494         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
495         arg_info [k].pad = pad;
496
497         g_free (cinfo);
498
499         return frame_size;
500 }
501
502 static const guchar cpuid_impl [] = {
503         0x55,                           /* push   %ebp */
504         0x89, 0xe5,                     /* mov    %esp,%ebp */
505         0x53,                           /* push   %ebx */
506         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
507         0x0f, 0xa2,                     /* cpuid   */
508         0x50,                           /* push   %eax */
509         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
510         0x89, 0x18,                     /* mov    %ebx,(%eax) */
511         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
512         0x89, 0x08,                     /* mov    %ecx,(%eax) */
513         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
514         0x89, 0x10,                     /* mov    %edx,(%eax) */
515         0x58,                           /* pop    %eax */
516         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
517         0x89, 0x02,                     /* mov    %eax,(%edx) */
518         0x5b,                           /* pop    %ebx */
519         0xc9,                           /* leave   */
520         0xc3,                           /* ret     */
521 };
522
523 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
524
525 static int 
526 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
527 {
528         int have_cpuid = 0;
529 #ifndef _MSC_VER
530         __asm__  __volatile__ (
531                 "pushfl\n"
532                 "popl %%eax\n"
533                 "movl %%eax, %%edx\n"
534                 "xorl $0x200000, %%eax\n"
535                 "pushl %%eax\n"
536                 "popfl\n"
537                 "pushfl\n"
538                 "popl %%eax\n"
539                 "xorl %%edx, %%eax\n"
540                 "andl $0x200000, %%eax\n"
541                 "movl %%eax, %0"
542                 : "=r" (have_cpuid)
543                 :
544                 : "%eax", "%edx"
545         );
546 #else
547         __asm {
548                 pushfd
549                 pop eax
550                 mov edx, eax
551                 xor eax, 0x200000
552                 push eax
553                 popfd
554                 pushfd
555                 pop eax
556                 xor eax, edx
557                 and eax, 0x200000
558                 mov have_cpuid, eax
559         }
560 #endif
561         if (have_cpuid) {
562                 /* Have to use the code manager to get around WinXP DEP */
563                 static CpuidFunc func = NULL;
564                 void *ptr;
565                 if (!func) {
566                         ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
567                         memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
568                         func = (CpuidFunc)ptr;
569                 }
570                 func (id, p_eax, p_ebx, p_ecx, p_edx);
571
572                 /*
573                  * We use this approach because of issues with gcc and pic code, see:
574                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
575                 __asm__ __volatile__ ("cpuid"
576                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
577                         : "a" (id));
578                 */
579                 return 1;
580         }
581         return 0;
582 }
583
584 /*
585  * Initialize the cpu to execute managed code.
586  */
587 void
588 mono_arch_cpu_init (void)
589 {
590         /* spec compliance requires running with double precision */
591 #ifndef _MSC_VER
592         guint16 fpcw;
593
594         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
595         fpcw &= ~X86_FPCW_PRECC_MASK;
596         fpcw |= X86_FPCW_PREC_DOUBLE;
597         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
598         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
599 #else
600         _control87 (_PC_53, MCW_PC);
601 #endif
602 }
603
604 /*
605  * Initialize architecture specific code.
606  */
607 void
608 mono_arch_init (void)
609 {
610         InitializeCriticalSection (&mini_arch_mutex);
611 }
612
613 /*
614  * Cleanup architecture specific code.
615  */
616 void
617 mono_arch_cleanup (void)
618 {
619         DeleteCriticalSection (&mini_arch_mutex);
620 }
621
622 /*
623  * This function returns the optimizations supported on this cpu.
624  */
625 guint32
626 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
627 {
628         int eax, ebx, ecx, edx;
629         guint32 opts = 0;
630         
631         *exclude_mask = 0;
632         /* Feature Flags function, flags returned in EDX. */
633         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
634                 if (edx & (1 << 15)) {
635                         opts |= MONO_OPT_CMOV;
636                         if (edx & 1)
637                                 opts |= MONO_OPT_FCMOV;
638                         else
639                                 *exclude_mask |= MONO_OPT_FCMOV;
640                 } else
641                         *exclude_mask |= MONO_OPT_CMOV;
642                 if (edx & (1 << 26))
643                         opts |= MONO_OPT_SSE2;
644                 else
645                         *exclude_mask |= MONO_OPT_SSE2;
646         }
647         return opts;
648 }
649
650 /*
651  * Determine whenever the trap whose info is in SIGINFO is caused by
652  * integer overflow.
653  */
654 gboolean
655 mono_arch_is_int_overflow (void *sigctx, void *info)
656 {
657         MonoContext ctx;
658         guint8* ip;
659
660         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
661
662         ip = (guint8*)ctx.eip;
663
664         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
665                 gint32 reg;
666
667                 /* idiv REG */
668                 switch (x86_modrm_rm (ip [1])) {
669                 case X86_EAX:
670                         reg = ctx.eax;
671                         break;
672                 case X86_ECX:
673                         reg = ctx.ecx;
674                         break;
675                 case X86_EDX:
676                         reg = ctx.edx;
677                         break;
678                 case X86_EBX:
679                         reg = ctx.ebx;
680                         break;
681                 case X86_ESI:
682                         reg = ctx.esi;
683                         break;
684                 case X86_EDI:
685                         reg = ctx.edi;
686                         break;
687                 default:
688                         g_assert_not_reached ();
689                         reg = -1;
690                 }
691
692                 if (reg == -1)
693                         return TRUE;
694         }
695                         
696         return FALSE;
697 }
698
699 GList *
700 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
701 {
702         GList *vars = NULL;
703         int i;
704
705         for (i = 0; i < cfg->num_varinfo; i++) {
706                 MonoInst *ins = cfg->varinfo [i];
707                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
708
709                 /* unused vars */
710                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
711                         continue;
712
713                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
714                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
715                         continue;
716
717                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
718                  * 8bit quantities in caller saved registers on x86 */
719                 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
720                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
721                         g_assert (i == vmv->idx);
722                         vars = g_list_prepend (vars, vmv);
723                 }
724         }
725
726         vars = mono_varlist_sort (cfg, vars, 0);
727
728         return vars;
729 }
730
731 GList *
732 mono_arch_get_global_int_regs (MonoCompile *cfg)
733 {
734         GList *regs = NULL;
735
736         /* we can use 3 registers for global allocation */
737         regs = g_list_prepend (regs, (gpointer)X86_EBX);
738         regs = g_list_prepend (regs, (gpointer)X86_ESI);
739         regs = g_list_prepend (regs, (gpointer)X86_EDI);
740
741         return regs;
742 }
743
744 /*
745  * mono_arch_regalloc_cost:
746  *
747  *  Return the cost, in number of memory references, of the action of 
748  * allocating the variable VMV into a register during global register
749  * allocation.
750  */
751 guint32
752 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
753 {
754         MonoInst *ins = cfg->varinfo [vmv->idx];
755
756         if (cfg->method->save_lmf)
757                 /* The register is already saved */
758                 return (ins->opcode == OP_ARG) ? 1 : 0;
759         else
760                 /* push+pop+possible load if it is an argument */
761                 return (ins->opcode == OP_ARG) ? 3 : 2;
762 }
763  
764 /*
765  * Set var information according to the calling convention. X86 version.
766  * The locals var stuff should most likely be split in another method.
767  */
768 void
769 mono_arch_allocate_vars (MonoCompile *cfg)
770 {
771         MonoMethodSignature *sig;
772         MonoMethodHeader *header;
773         MonoInst *inst;
774         guint32 locals_stack_size, locals_stack_align;
775         int i, offset;
776         gint32 *offsets;
777         CallInfo *cinfo;
778
779         header = mono_method_get_header (cfg->method);
780         sig = mono_method_signature (cfg->method);
781
782         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
783
784         cfg->frame_reg = MONO_ARCH_BASEREG;
785         offset = 0;
786
787         /* Reserve space to save LMF and caller saved registers */
788
789         if (cfg->method->save_lmf) {
790                 offset += sizeof (MonoLMF);
791         } else {
792                 if (cfg->used_int_regs & (1 << X86_EBX)) {
793                         offset += 4;
794                 }
795
796                 if (cfg->used_int_regs & (1 << X86_EDI)) {
797                         offset += 4;
798                 }
799
800                 if (cfg->used_int_regs & (1 << X86_ESI)) {
801                         offset += 4;
802                 }
803         }
804
805         switch (cinfo->ret.storage) {
806         case ArgValuetypeInReg:
807                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
808                 offset += 8;
809                 cfg->ret->opcode = OP_REGOFFSET;
810                 cfg->ret->inst_basereg = X86_EBP;
811                 cfg->ret->inst_offset = - offset;
812                 break;
813         default:
814                 break;
815         }
816
817         /* Allocate locals */
818         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
819         if (locals_stack_align) {
820                 offset += (locals_stack_align - 1);
821                 offset &= ~(locals_stack_align - 1);
822         }
823         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
824                 if (offsets [i] != -1) {
825                         MonoInst *inst = cfg->varinfo [i];
826                         inst->opcode = OP_REGOFFSET;
827                         inst->inst_basereg = X86_EBP;
828                         inst->inst_offset = - (offset + offsets [i]);
829                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
830                 }
831         }
832         offset += locals_stack_size;
833
834
835         /*
836          * Allocate arguments+return value
837          */
838
839         switch (cinfo->ret.storage) {
840         case ArgOnStack:
841                 cfg->ret->opcode = OP_REGOFFSET;
842                 cfg->ret->inst_basereg = X86_EBP;
843                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
844                 break;
845         case ArgValuetypeInReg:
846                 break;
847         case ArgInIReg:
848                 cfg->ret->opcode = OP_REGVAR;
849                 cfg->ret->inst_c0 = cinfo->ret.reg;
850                 break;
851         case ArgNone:
852         case ArgOnFloatFpStack:
853         case ArgOnDoubleFpStack:
854                 break;
855         default:
856                 g_assert_not_reached ();
857         }
858
859         if (sig->call_convention == MONO_CALL_VARARG) {
860                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
861                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
862         }
863
864         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
865                 ArgInfo *ainfo = &cinfo->args [i];
866                 inst = cfg->args [i];
867                 if (inst->opcode != OP_REGVAR) {
868                         inst->opcode = OP_REGOFFSET;
869                         inst->inst_basereg = X86_EBP;
870                 }
871                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
872         }
873
874         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
875         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
876
877         cfg->stack_offset = offset;
878 }
879
880 void
881 mono_arch_create_vars (MonoCompile *cfg)
882 {
883         MonoMethodSignature *sig;
884         CallInfo *cinfo;
885
886         sig = mono_method_signature (cfg->method);
887
888         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
889
890         if (cinfo->ret.storage == ArgValuetypeInReg)
891                 cfg->ret_var_is_local = TRUE;
892 }
893
894 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
895  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
896  */
897
898 static void
899 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
900 {
901         MonoInst *arg;
902         MonoMethodSignature *tmp_sig;
903         MonoInst *sig_arg;
904
905         /* FIXME: Add support for signature tokens to AOT */
906         cfg->disable_aot = TRUE;
907         MONO_INST_NEW (cfg, arg, OP_OUTARG);
908
909         /*
910          * mono_ArgIterator_Setup assumes the signature cookie is 
911          * passed first and all the arguments which were before it are
912          * passed on the stack after the signature. So compensate by 
913          * passing a different signature.
914          */
915         tmp_sig = mono_metadata_signature_dup (call->signature);
916         tmp_sig->param_count -= call->signature->sentinelpos;
917         tmp_sig->sentinelpos = 0;
918         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
919
920         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
921         sig_arg->inst_p0 = tmp_sig;
922
923         arg->inst_left = sig_arg;
924         arg->type = STACK_PTR;
925         /* prepend, so they get reversed */
926         arg->next = call->out_args;
927         call->out_args = arg;
928 }
929
930 /*
931  * It is expensive to adjust esp for each individual fp argument pushed on the stack
932  * so we try to do it just once when we have multiple fp arguments in a row.
933  * We don't use this mechanism generally because for int arguments the generated code
934  * is slightly bigger and new generation cpus optimize away the dependency chains
935  * created by push instructions on the esp value.
936  * fp_arg_setup is the first argument in the execution sequence where the esp register
937  * is modified.
938  */
939 static int
940 collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
941 {
942         int fp_space = 0;
943         MonoType *t;
944
945         for (; start_arg < sig->param_count; ++start_arg) {
946                 t = mono_type_get_underlying_type (sig->params [start_arg]);
947                 if (!t->byref && t->type == MONO_TYPE_R8) {
948                         fp_space += sizeof (double);
949                         *fp_arg_setup = start_arg;
950                 } else {
951                         break;
952                 }
953         }
954         return fp_space;
955 }
956
957 /* 
958  * take the arguments and generate the arch-specific
959  * instructions to properly call the function in call.
960  * This includes pushing, moving arguments to the right register
961  * etc.
962  */
963 MonoCallInst*
964 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
965         MonoInst *arg, *in;
966         MonoMethodSignature *sig;
967         int i, n;
968         CallInfo *cinfo;
969         int sentinelpos = 0;
970         int fp_args_space = 0, fp_args_offset = 0, fp_arg_setup = -1;
971
972         sig = call->signature;
973         n = sig->param_count + sig->hasthis;
974
975         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
976
977         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
978                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
979
980         for (i = 0; i < n; ++i) {
981                 ArgInfo *ainfo = cinfo->args + i;
982
983                 /* Emit the signature cookie just before the implicit arguments */
984                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
985                         emit_sig_cookie (cfg, call);
986                 }
987
988                 if (is_virtual && i == 0) {
989                         /* the argument will be attached to the call instrucion */
990                         in = call->args [i];
991                 } else {
992                         MonoType *t;
993
994                         if (i >= sig->hasthis)
995                                 t = sig->params [i - sig->hasthis];
996                         else
997                                 t = &mono_defaults.int_class->byval_arg;
998                         t = mono_type_get_underlying_type (t);
999
1000                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1001                         in = call->args [i];
1002                         arg->cil_code = in->cil_code;
1003                         arg->inst_left = in;
1004                         arg->type = in->type;
1005                         /* prepend, so they get reversed */
1006                         arg->next = call->out_args;
1007                         call->out_args = arg;
1008
1009                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
1010                                 guint32 size, align;
1011
1012                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
1013                                         size = sizeof (MonoTypedRef);
1014                                         align = sizeof (gpointer);
1015                                 }
1016                                 else
1017                                         if (sig->pinvoke)
1018                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
1019                                         else {
1020                                                 int ialign;
1021                                                 size = mono_type_stack_size (&in->klass->byval_arg, &ialign);
1022                                                 align = ialign;
1023                                         }
1024                                 arg->opcode = OP_OUTARG_VT;
1025                                 arg->klass = in->klass;
1026                                 arg->backend.is_pinvoke = sig->pinvoke;
1027                                 arg->inst_imm = size; 
1028                         }
1029                         else {
1030                                 switch (ainfo->storage) {
1031                                 case ArgOnStack:
1032                                         arg->opcode = OP_OUTARG;
1033                                         if (!t->byref) {
1034                                                 if (t->type == MONO_TYPE_R4) {
1035                                                         arg->opcode = OP_OUTARG_R4;
1036                                                 } else if (t->type == MONO_TYPE_R8) {
1037                                                         arg->opcode = OP_OUTARG_R8;
1038                                                         /* we store in the upper bits of backen.arg_info the needed
1039                                                          * esp adjustment and in the lower bits the offset from esp
1040                                                          * where the arg needs to be stored
1041                                                          */
1042                                                         if (!fp_args_space) {
1043                                                                 fp_args_space = collect_fp_stack_space (sig, i - sig->hasthis, &fp_arg_setup);
1044                                                                 fp_args_offset = fp_args_space;
1045                                                         }
1046                                                         arg->backend.arg_info = fp_args_space - fp_args_offset;
1047                                                         fp_args_offset -= sizeof (double);
1048                                                         if (i - sig->hasthis == fp_arg_setup) {
1049                                                                 arg->backend.arg_info |= fp_args_space << 16;
1050                                                         }
1051                                                         if (fp_args_offset == 0) {
1052                                                                 /* the allocated esp stack is finished:
1053                                                                  * prepare for an eventual second run of fp args
1054                                                                  */
1055                                                                 fp_args_space = 0;
1056                                                         }
1057                                                 }
1058                                         }
1059                                         break;
1060                                 default:
1061                                         g_assert_not_reached ();
1062                                 }
1063                         }
1064                 }
1065         }
1066
1067         /* Handle the case where there are no implicit arguments */
1068         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1069                 emit_sig_cookie (cfg, call);
1070         }
1071
1072         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1073                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1074                         MonoInst *zero_inst;
1075                         /*
1076                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1077                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1078                          * before calling the function. So we add a dummy instruction to represent pushing the 
1079                          * struct return address to the stack. The return address will be saved to this stack slot 
1080                          * by the code emitted in this_vret_args.
1081                          */
1082                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1083                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1084                         zero_inst->inst_p0 = 0;
1085                         arg->inst_left = zero_inst;
1086                         arg->type = STACK_PTR;
1087                         /* prepend, so they get reversed */
1088                         arg->next = call->out_args;
1089                         call->out_args = arg;
1090                 }
1091                 else
1092                         /* if the function returns a struct, the called method already does a ret $0x4 */
1093                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1094                                 cinfo->stack_usage -= 4;
1095         }
1096         
1097         call->stack_usage = cinfo->stack_usage;
1098
1099 #if defined(__APPLE__)
1100         if (cinfo->need_stack_align) {
1101                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1102                 arg->inst_c0 = cinfo->stack_align_amount;
1103                 arg->next = call->out_args;
1104                 call->out_args = arg;
1105         }
1106 #endif 
1107
1108         return call;
1109 }
1110
1111 /*
1112  * Allow tracing to work with this interface (with an optional argument)
1113  */
1114 void*
1115 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1116 {
1117         guchar *code = p;
1118
1119 #if __APPLE__
1120         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1121 #endif
1122
1123         /* if some args are passed in registers, we need to save them here */
1124         x86_push_reg (code, X86_EBP);
1125
1126         if (cfg->compile_aot) {
1127                 x86_push_imm (code, cfg->method);
1128                 x86_mov_reg_imm (code, X86_EAX, func);
1129                 x86_call_reg (code, X86_EAX);
1130         } else {
1131                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1132                 x86_push_imm (code, cfg->method);
1133                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1134                 x86_call_code (code, 0);
1135         }
1136 #if __APPLE__
1137         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 16);
1138 #else
1139         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1140 #endif
1141
1142         return code;
1143 }
1144
1145 enum {
1146         SAVE_NONE,
1147         SAVE_STRUCT,
1148         SAVE_EAX,
1149         SAVE_EAX_EDX,
1150         SAVE_FP
1151 };
1152
1153 void*
1154 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1155 {
1156         guchar *code = p;
1157         int arg_size = 0, save_mode = SAVE_NONE;
1158         MonoMethod *method = cfg->method;
1159         
1160         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1161         case MONO_TYPE_VOID:
1162                 /* special case string .ctor icall */
1163                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1164                         save_mode = SAVE_EAX;
1165                 else
1166                         save_mode = SAVE_NONE;
1167                 break;
1168         case MONO_TYPE_I8:
1169         case MONO_TYPE_U8:
1170                 save_mode = SAVE_EAX_EDX;
1171                 break;
1172         case MONO_TYPE_R4:
1173         case MONO_TYPE_R8:
1174                 save_mode = SAVE_FP;
1175                 break;
1176         case MONO_TYPE_GENERICINST:
1177                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1178                         save_mode = SAVE_EAX;
1179                         break;
1180                 }
1181                 /* Fall through */
1182         case MONO_TYPE_VALUETYPE:
1183                 save_mode = SAVE_STRUCT;
1184                 break;
1185         default:
1186                 save_mode = SAVE_EAX;
1187                 break;
1188         }
1189
1190         switch (save_mode) {
1191         case SAVE_EAX_EDX:
1192                 x86_push_reg (code, X86_EDX);
1193                 x86_push_reg (code, X86_EAX);
1194                 if (enable_arguments) {
1195                         x86_push_reg (code, X86_EDX);
1196                         x86_push_reg (code, X86_EAX);
1197                         arg_size = 8;
1198                 }
1199                 break;
1200         case SAVE_EAX:
1201                 x86_push_reg (code, X86_EAX);
1202                 if (enable_arguments) {
1203                         x86_push_reg (code, X86_EAX);
1204                         arg_size = 4;
1205                 }
1206                 break;
1207         case SAVE_FP:
1208                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1209                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1210                 if (enable_arguments) {
1211                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1212                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1213                         arg_size = 8;
1214                 }
1215                 break;
1216         case SAVE_STRUCT:
1217                 if (enable_arguments) {
1218                         x86_push_membase (code, X86_EBP, 8);
1219                         arg_size = 4;
1220                 }
1221                 break;
1222         case SAVE_NONE:
1223         default:
1224                 break;
1225         }
1226
1227         if (cfg->compile_aot) {
1228                 x86_push_imm (code, method);
1229                 x86_mov_reg_imm (code, X86_EAX, func);
1230                 x86_call_reg (code, X86_EAX);
1231         } else {
1232                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1233                 x86_push_imm (code, method);
1234                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1235                 x86_call_code (code, 0);
1236         }
1237         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1238
1239         switch (save_mode) {
1240         case SAVE_EAX_EDX:
1241                 x86_pop_reg (code, X86_EAX);
1242                 x86_pop_reg (code, X86_EDX);
1243                 break;
1244         case SAVE_EAX:
1245                 x86_pop_reg (code, X86_EAX);
1246                 break;
1247         case SAVE_FP:
1248                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1249                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1250                 break;
1251         case SAVE_NONE:
1252         default:
1253                 break;
1254         }
1255
1256         return code;
1257 }
1258
1259 #define EMIT_COND_BRANCH(ins,cond,sign) \
1260 if (ins->flags & MONO_INST_BRLABEL) { \
1261         if (ins->inst_i0->inst_c0) { \
1262                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1263         } else { \
1264                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1265                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1266                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1267                         x86_branch8 (code, cond, 0, sign); \
1268                 else \
1269                         x86_branch32 (code, cond, 0, sign); \
1270         } \
1271 } else { \
1272         if (ins->inst_true_bb->native_offset) { \
1273                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1274         } else { \
1275                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1276                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1277                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1278                         x86_branch8 (code, cond, 0, sign); \
1279                 else \
1280                         x86_branch32 (code, cond, 0, sign); \
1281         } \
1282 }
1283
1284 /*  
1285  *      Emit an exception if condition is fail and
1286  *  if possible do a directly branch to target 
1287  */
1288 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1289         do {                                                        \
1290                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1291                 if (tins == NULL) {                                                                             \
1292                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1293                                         MONO_PATCH_INFO_EXC, exc_name);  \
1294                         x86_branch32 (code, cond, 0, signed);               \
1295                 } else {        \
1296                         EMIT_COND_BRANCH (tins, cond, signed);  \
1297                 }                       \
1298         } while (0); 
1299
1300 #define EMIT_FPCOMPARE(code) do { \
1301         x86_fcompp (code); \
1302         x86_fnstsw (code); \
1303 } while (0); 
1304
1305
1306 static guint8*
1307 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1308 {
1309         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1310         x86_call_code (code, 0);
1311
1312         return code;
1313 }
1314
1315 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1316
1317 /*
1318  * peephole_pass_1:
1319  *
1320  *   Perform peephole opts which should/can be performed before local regalloc
1321  */
1322 static void
1323 peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1324 {
1325         MonoInst *ins, *last_ins = NULL;
1326         ins = bb->code;
1327
1328         while (ins) {
1329                 switch (ins->opcode) {
1330                 case OP_IADD_IMM:
1331                 case OP_ADD_IMM:
1332                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1333                                 /* 
1334                                  * X86_LEA is like ADD, but doesn't have the
1335                                  * sreg1==dreg restriction.
1336                                  */
1337                                 ins->opcode = OP_X86_LEA_MEMBASE;
1338                                 ins->inst_basereg = ins->sreg1;
1339                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1340                                 ins->opcode = OP_X86_INC_REG;
1341                         break;
1342                 case OP_SUB_IMM:
1343                 case OP_ISUB_IMM:
1344                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1345                                 ins->opcode = OP_X86_LEA_MEMBASE;
1346                                 ins->inst_basereg = ins->sreg1;
1347                                 ins->inst_imm = -ins->inst_imm;
1348                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1349                                 ins->opcode = OP_X86_DEC_REG;
1350                         break;
1351                 case OP_COMPARE_IMM:
1352                 case OP_ICOMPARE_IMM:
1353                         /* OP_COMPARE_IMM (reg, 0) 
1354                          * --> 
1355                          * OP_X86_TEST_NULL (reg) 
1356                          */
1357                         if (!ins->inst_imm)
1358                                 ins->opcode = OP_X86_TEST_NULL;
1359                         break;
1360                 case OP_X86_COMPARE_MEMBASE_IMM:
1361                         /* 
1362                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1363                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1364                          * -->
1365                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1366                          * OP_COMPARE_IMM reg, imm
1367                          *
1368                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1369                          */
1370                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1371                             ins->inst_basereg == last_ins->inst_destbasereg &&
1372                             ins->inst_offset == last_ins->inst_offset) {
1373                                         ins->opcode = OP_COMPARE_IMM;
1374                                         ins->sreg1 = last_ins->sreg1;
1375
1376                                         /* check if we can remove cmp reg,0 with test null */
1377                                         if (!ins->inst_imm)
1378                                                 ins->opcode = OP_X86_TEST_NULL;
1379                                 }
1380
1381                         break;
1382                 case OP_LOAD_MEMBASE:
1383                 case OP_LOADI4_MEMBASE:
1384                         /* 
1385                          * Note: if reg1 = reg2 the load op is removed
1386                          *
1387                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1388                          * OP_LOAD_MEMBASE offset(basereg), reg2
1389                          * -->
1390                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1391                          * OP_MOVE reg1, reg2
1392                          */
1393                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1394                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1395                             ins->inst_basereg == last_ins->inst_destbasereg &&
1396                             ins->inst_offset == last_ins->inst_offset) {
1397                                 if (ins->dreg == last_ins->sreg1) {
1398                                         last_ins->next = ins->next;                             
1399                                         ins = ins->next;                                
1400                                         continue;
1401                                 } else {
1402                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1403                                         ins->opcode = OP_MOVE;
1404                                         ins->sreg1 = last_ins->sreg1;
1405                                 }
1406
1407                         /* 
1408                          * Note: reg1 must be different from the basereg in the second load
1409                          * Note: if reg1 = reg2 is equal then second load is removed
1410                          *
1411                          * OP_LOAD_MEMBASE offset(basereg), reg1
1412                          * OP_LOAD_MEMBASE offset(basereg), reg2
1413                          * -->
1414                          * OP_LOAD_MEMBASE offset(basereg), reg1
1415                          * OP_MOVE reg1, reg2
1416                          */
1417                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1418                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1419                               ins->inst_basereg != last_ins->dreg &&
1420                               ins->inst_basereg == last_ins->inst_basereg &&
1421                               ins->inst_offset == last_ins->inst_offset) {
1422
1423                                 if (ins->dreg == last_ins->dreg) {
1424                                         last_ins->next = ins->next;                             
1425                                         ins = ins->next;                                
1426                                         continue;
1427                                 } else {
1428                                         ins->opcode = OP_MOVE;
1429                                         ins->sreg1 = last_ins->dreg;
1430                                 }
1431
1432                                 //g_assert_not_reached ();
1433
1434 #if 0
1435                         /* 
1436                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1437                          * OP_LOAD_MEMBASE offset(basereg), reg
1438                          * -->
1439                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1440                          * OP_ICONST reg, imm
1441                          */
1442                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1443                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1444                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1445                                    ins->inst_offset == last_ins->inst_offset) {
1446                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1447                                 ins->opcode = OP_ICONST;
1448                                 ins->inst_c0 = last_ins->inst_imm;
1449                                 g_assert_not_reached (); // check this rule
1450 #endif
1451                         }
1452                         break;
1453                 case OP_LOADU1_MEMBASE:
1454                 case OP_LOADI1_MEMBASE:
1455                         /* 
1456                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1457                          * OP_LOAD_MEMBASE offset(basereg), reg2
1458                          * -->
1459                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1460                          * CONV_I2/U2 reg1, reg2
1461                          */
1462                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1463                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1464                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1465                                         ins->inst_offset == last_ins->inst_offset) {
1466                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1467                                 ins->sreg1 = last_ins->sreg1;
1468                         }
1469                         break;
1470                 case OP_LOADU2_MEMBASE:
1471                 case OP_LOADI2_MEMBASE:
1472                         /* 
1473                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1474                          * OP_LOAD_MEMBASE offset(basereg), reg2
1475                          * -->
1476                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1477                          * CONV_I2/U2 reg1, reg2
1478                          */
1479                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1480                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1481                                         ins->inst_offset == last_ins->inst_offset) {
1482                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1483                                 ins->sreg1 = last_ins->sreg1;
1484                         }
1485                         break;
1486                 case CEE_CONV_I4:
1487                 case CEE_CONV_U4:
1488                 case OP_ICONV_TO_I4:
1489                 case OP_MOVE:
1490                         /*
1491                          * Removes:
1492                          *
1493                          * OP_MOVE reg, reg 
1494                          */
1495                         if (ins->dreg == ins->sreg1) {
1496                                 if (last_ins)
1497                                         last_ins->next = ins->next;                             
1498                                 ins = ins->next;
1499                                 continue;
1500                         }
1501                         /* 
1502                          * Removes:
1503                          *
1504                          * OP_MOVE sreg, dreg 
1505                          * OP_MOVE dreg, sreg
1506                          */
1507                         if (last_ins && last_ins->opcode == OP_MOVE &&
1508                             ins->sreg1 == last_ins->dreg &&
1509                             ins->dreg == last_ins->sreg1) {
1510                                 last_ins->next = ins->next;                             
1511                                 ins = ins->next;                                
1512                                 continue;
1513                         }
1514                         break;
1515                         
1516                 case OP_X86_PUSH_MEMBASE:
1517                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1518                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1519                             ins->inst_basereg == last_ins->inst_destbasereg &&
1520                             ins->inst_offset == last_ins->inst_offset) {
1521                                     ins->opcode = OP_X86_PUSH;
1522                                     ins->sreg1 = last_ins->sreg1;
1523                         }
1524                         break;
1525                 }
1526                 last_ins = ins;
1527                 ins = ins->next;
1528         }
1529         bb->last_ins = last_ins;
1530 }
1531
1532 static void
1533 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1534 {
1535         MonoInst *ins, *last_ins = NULL;
1536         ins = bb->code;
1537
1538         while (ins) {
1539
1540                 switch (ins->opcode) {
1541                 case OP_ICONST:
1542                         /* reg = 0 -> XOR (reg, reg) */
1543                         /* XOR sets cflags on x86, so we cant do it always */
1544                         if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1545                                 MonoInst *ins2;
1546
1547                                 ins->opcode = OP_IXOR;
1548                                 ins->sreg1 = ins->dreg;
1549                                 ins->sreg2 = ins->dreg;
1550
1551                                 /* 
1552                                  * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG 
1553                                  * since it takes 3 bytes instead of 7.
1554                                  */
1555                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1556                                         if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1557                                                 ins2->opcode = OP_STORE_MEMBASE_REG;
1558                                                 ins2->sreg1 = ins->dreg;
1559                                         }
1560                                         else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1561                                                 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1562                                                 ins2->sreg1 = ins->dreg;
1563                                         }
1564                                         else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1565                                                 /* Continue iteration */
1566                                         }
1567                                         else
1568                                                 break;
1569                                 }
1570                         }
1571                         break;
1572                 case OP_IADD_IMM:
1573                 case OP_ADD_IMM:
1574                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1575                                 ins->opcode = OP_X86_INC_REG;
1576                         break;
1577                 case OP_ISUB_IMM:
1578                 case OP_SUB_IMM:
1579                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1580                                 ins->opcode = OP_X86_DEC_REG;
1581                         break;
1582                 case OP_X86_COMPARE_MEMBASE_IMM:
1583                         /* 
1584                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1585                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1586                          * -->
1587                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1588                          * OP_COMPARE_IMM reg, imm
1589                          *
1590                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1591                          */
1592                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1593                             ins->inst_basereg == last_ins->inst_destbasereg &&
1594                             ins->inst_offset == last_ins->inst_offset) {
1595                                         ins->opcode = OP_COMPARE_IMM;
1596                                         ins->sreg1 = last_ins->sreg1;
1597
1598                                         /* check if we can remove cmp reg,0 with test null */
1599                                         if (!ins->inst_imm)
1600                                                 ins->opcode = OP_X86_TEST_NULL;
1601                                 }
1602
1603                         break;
1604                 case OP_LOAD_MEMBASE:
1605                 case OP_LOADI4_MEMBASE:
1606                         /* 
1607                          * Note: if reg1 = reg2 the load op is removed
1608                          *
1609                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1610                          * OP_LOAD_MEMBASE offset(basereg), reg2
1611                          * -->
1612                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1613                          * OP_MOVE reg1, reg2
1614                          */
1615                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1616                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1617                             ins->inst_basereg == last_ins->inst_destbasereg &&
1618                             ins->inst_offset == last_ins->inst_offset) {
1619                                 if (ins->dreg == last_ins->sreg1) {
1620                                         last_ins->next = ins->next;                             
1621                                         ins = ins->next;                                
1622                                         continue;
1623                                 } else {
1624                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1625                                         ins->opcode = OP_MOVE;
1626                                         ins->sreg1 = last_ins->sreg1;
1627                                 }
1628
1629                         /* 
1630                          * Note: reg1 must be different from the basereg in the second load
1631                          * Note: if reg1 = reg2 is equal then second load is removed
1632                          *
1633                          * OP_LOAD_MEMBASE offset(basereg), reg1
1634                          * OP_LOAD_MEMBASE offset(basereg), reg2
1635                          * -->
1636                          * OP_LOAD_MEMBASE offset(basereg), reg1
1637                          * OP_MOVE reg1, reg2
1638                          */
1639                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1640                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1641                               ins->inst_basereg != last_ins->dreg &&
1642                               ins->inst_basereg == last_ins->inst_basereg &&
1643                               ins->inst_offset == last_ins->inst_offset) {
1644
1645                                 if (ins->dreg == last_ins->dreg) {
1646                                         last_ins->next = ins->next;                             
1647                                         ins = ins->next;                                
1648                                         continue;
1649                                 } else {
1650                                         ins->opcode = OP_MOVE;
1651                                         ins->sreg1 = last_ins->dreg;
1652                                 }
1653
1654                                 //g_assert_not_reached ();
1655
1656 #if 0
1657                         /* 
1658                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1659                          * OP_LOAD_MEMBASE offset(basereg), reg
1660                          * -->
1661                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1662                          * OP_ICONST reg, imm
1663                          */
1664                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1665                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1666                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1667                                    ins->inst_offset == last_ins->inst_offset) {
1668                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1669                                 ins->opcode = OP_ICONST;
1670                                 ins->inst_c0 = last_ins->inst_imm;
1671                                 g_assert_not_reached (); // check this rule
1672 #endif
1673                         }
1674                         break;
1675                 case OP_LOADU1_MEMBASE:
1676                 case OP_LOADI1_MEMBASE:
1677                         /* 
1678                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1679                          * OP_LOAD_MEMBASE offset(basereg), reg2
1680                          * -->
1681                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1682                          * CONV_I2/U2 reg1, reg2
1683                          */
1684                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1685                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1686                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1687                                         ins->inst_offset == last_ins->inst_offset) {
1688                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1689                                 ins->sreg1 = last_ins->sreg1;
1690                         }
1691                         break;
1692                 case OP_LOADU2_MEMBASE:
1693                 case OP_LOADI2_MEMBASE:
1694                         /* 
1695                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1696                          * OP_LOAD_MEMBASE offset(basereg), reg2
1697                          * -->
1698                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1699                          * CONV_I2/U2 reg1, reg2
1700                          */
1701                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1702                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1703                                         ins->inst_offset == last_ins->inst_offset) {
1704                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1705                                 ins->sreg1 = last_ins->sreg1;
1706                         }
1707                         break;
1708                 case CEE_CONV_I4:
1709                 case CEE_CONV_U4:
1710                 case OP_ICONV_TO_I4:
1711                 case OP_MOVE:
1712                         /*
1713                          * Removes:
1714                          *
1715                          * OP_MOVE reg, reg 
1716                          */
1717                         if (ins->dreg == ins->sreg1) {
1718                                 if (last_ins)
1719                                         last_ins->next = ins->next;                             
1720                                 ins = ins->next;
1721                                 continue;
1722                         }
1723                         /* 
1724                          * Removes:
1725                          *
1726                          * OP_MOVE sreg, dreg 
1727                          * OP_MOVE dreg, sreg
1728                          */
1729                         if (last_ins && last_ins->opcode == OP_MOVE &&
1730                             ins->sreg1 == last_ins->dreg &&
1731                             ins->dreg == last_ins->sreg1) {
1732                                 last_ins->next = ins->next;                             
1733                                 ins = ins->next;                                
1734                                 continue;
1735                         }
1736                         break;
1737                 case OP_X86_PUSH_MEMBASE:
1738                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1739                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1740                             ins->inst_basereg == last_ins->inst_destbasereg &&
1741                             ins->inst_offset == last_ins->inst_offset) {
1742                                     ins->opcode = OP_X86_PUSH;
1743                                     ins->sreg1 = last_ins->sreg1;
1744                         }
1745                         break;
1746                 }
1747                 last_ins = ins;
1748                 ins = ins->next;
1749         }
1750         bb->last_ins = last_ins;
1751 }
1752
1753 static const int 
1754 branch_cc_table [] = {
1755         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1756         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1757         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1758 };
1759
1760 /* Maps CMP_... constants to X86_CC_... constants */
1761 static const int
1762 cc_table [] = {
1763         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1764         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1765 };
1766
1767 static const int
1768 cc_signed_table [] = {
1769         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1770         FALSE, FALSE, FALSE, FALSE
1771 };
1772
1773 void
1774 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1775 {
1776         if (cfg->opt & MONO_OPT_PEEPHOLE)
1777                 peephole_pass_1 (cfg, bb);
1778
1779         mono_local_regalloc (cfg, bb);
1780 }
1781
1782 static unsigned char*
1783 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1784 {
1785 #define XMM_TEMP_REG 0
1786         if (cfg->opt & MONO_OPT_SSE2 && size < 8) {
1787                 /* optimize by assigning a local var for this use so we avoid
1788                  * the stack manipulations */
1789                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1790                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1791                 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1792                 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1793                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1794                 if (size == 1)
1795                         x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1796                 else if (size == 2)
1797                         x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1798                 return code;
1799         }
1800         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1801         x86_fnstcw_membase(code, X86_ESP, 0);
1802         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1803         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1804         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1805         x86_fldcw_membase (code, X86_ESP, 2);
1806         if (size == 8) {
1807                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1808                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1809                 x86_pop_reg (code, dreg);
1810                 /* FIXME: need the high register 
1811                  * x86_pop_reg (code, dreg_high);
1812                  */
1813         } else {
1814                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1815                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1816                 x86_pop_reg (code, dreg);
1817         }
1818         x86_fldcw_membase (code, X86_ESP, 0);
1819         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1820
1821         if (size == 1)
1822                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1823         else if (size == 2)
1824                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1825         return code;
1826 }
1827
1828 static unsigned char*
1829 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1830 {
1831         int sreg = tree->sreg1;
1832         int need_touch = FALSE;
1833
1834 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1835         need_touch = TRUE;
1836 #endif
1837
1838         if (need_touch) {
1839                 guint8* br[5];
1840
1841                 /*
1842                  * Under Windows:
1843                  * If requested stack size is larger than one page,
1844                  * perform stack-touch operation
1845                  */
1846                 /*
1847                  * Generate stack probe code.
1848                  * Under Windows, it is necessary to allocate one page at a time,
1849                  * "touching" stack after each successful sub-allocation. This is
1850                  * because of the way stack growth is implemented - there is a
1851                  * guard page before the lowest stack page that is currently commited.
1852                  * Stack normally grows sequentially so OS traps access to the
1853                  * guard page and commits more pages when needed.
1854                  */
1855                 x86_test_reg_imm (code, sreg, ~0xFFF);
1856                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1857
1858                 br[2] = code; /* loop */
1859                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1860                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1861
1862                 /* 
1863                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1864                  * that follows only initializes the last part of the area.
1865                  */
1866                 /* Same as the init code below with size==0x1000 */
1867                 if (tree->flags & MONO_INST_INIT) {
1868                         x86_push_reg (code, X86_EAX);
1869                         x86_push_reg (code, X86_ECX);
1870                         x86_push_reg (code, X86_EDI);
1871                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1872                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1873                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1874                         x86_cld (code);
1875                         x86_prefix (code, X86_REP_PREFIX);
1876                         x86_stosl (code);
1877                         x86_pop_reg (code, X86_EDI);
1878                         x86_pop_reg (code, X86_ECX);
1879                         x86_pop_reg (code, X86_EAX);
1880                 }
1881
1882                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1883                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1884                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1885                 x86_patch (br[3], br[2]);
1886                 x86_test_reg_reg (code, sreg, sreg);
1887                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1888                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1889
1890                 br[1] = code; x86_jump8 (code, 0);
1891
1892                 x86_patch (br[0], code);
1893                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1894                 x86_patch (br[1], code);
1895                 x86_patch (br[4], code);
1896         }
1897         else
1898                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1899
1900         if (tree->flags & MONO_INST_INIT) {
1901                 int offset = 0;
1902                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1903                         x86_push_reg (code, X86_EAX);
1904                         offset += 4;
1905                 }
1906                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1907                         x86_push_reg (code, X86_ECX);
1908                         offset += 4;
1909                 }
1910                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1911                         x86_push_reg (code, X86_EDI);
1912                         offset += 4;
1913                 }
1914                 
1915                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1916                 if (sreg != X86_ECX)
1917                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1918                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1919                                 
1920                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1921                 x86_cld (code);
1922                 x86_prefix (code, X86_REP_PREFIX);
1923                 x86_stosl (code);
1924                 
1925                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1926                         x86_pop_reg (code, X86_EDI);
1927                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1928                         x86_pop_reg (code, X86_ECX);
1929                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1930                         x86_pop_reg (code, X86_EAX);
1931         }
1932         return code;
1933 }
1934
1935
1936 static guint8*
1937 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1938 {
1939         CallInfo *cinfo;
1940         int quad;
1941
1942         /* Move return value to the target register */
1943         switch (ins->opcode) {
1944         case CEE_CALL:
1945         case OP_CALL_REG:
1946         case OP_CALL_MEMBASE:
1947                 if (ins->dreg != X86_EAX)
1948                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1949                 break;
1950         case OP_VCALL:
1951         case OP_VCALL_REG:
1952         case OP_VCALL_MEMBASE:
1953                 cinfo = get_call_info (cfg, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
1954                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1955                         /* Pop the destination address from the stack */
1956                         x86_pop_reg (code, X86_ECX);
1957                         
1958                         for (quad = 0; quad < 2; quad ++) {
1959                                 switch (cinfo->ret.pair_storage [quad]) {
1960                                 case ArgInIReg:
1961                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1962                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1963                                         break;
1964                                 case ArgNone:
1965                                         break;
1966                                 default:
1967                                         g_assert_not_reached ();
1968                                 }
1969                         }
1970                 }
1971         default:
1972                 break;
1973         }
1974
1975         return code;
1976 }
1977
1978 /*
1979  * emit_tls_get:
1980  * @code: buffer to store code to
1981  * @dreg: hard register where to place the result
1982  * @tls_offset: offset info
1983  *
1984  * emit_tls_get emits in @code the native code that puts in the dreg register
1985  * the item in the thread local storage identified by tls_offset.
1986  *
1987  * Returns: a pointer to the end of the stored code
1988  */
1989 static guint8*
1990 emit_tls_get (guint8* code, int dreg, int tls_offset)
1991 {
1992 #ifdef PLATFORM_WIN32
1993         /* 
1994          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1995          * Journal and/or a disassembly of the TlsGet () function.
1996          */
1997         g_assert (tls_offset < 64);
1998         x86_prefix (code, X86_FS_PREFIX);
1999         x86_mov_reg_mem (code, dreg, 0x18, 4);
2000         /* Dunno what this does but TlsGetValue () contains it */
2001         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2002         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2003 #else
2004         if (optimize_for_xen) {
2005                 x86_prefix (code, X86_GS_PREFIX);
2006                 x86_mov_reg_mem (code, dreg, 0, 4);
2007                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
2008         } else {
2009                 x86_prefix (code, X86_GS_PREFIX);
2010                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
2011         }
2012 #endif
2013         return code;
2014 }
2015
2016 /*
2017  * emit_load_volatile_arguments:
2018  *
2019  *  Load volatile arguments from the stack to the original input registers.
2020  * Required before a tail call.
2021  */
2022 static guint8*
2023 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2024 {
2025         MonoMethod *method = cfg->method;
2026         MonoMethodSignature *sig;
2027         MonoInst *inst;
2028         CallInfo *cinfo;
2029         guint32 i;
2030
2031         /* FIXME: Generate intermediate code instead */
2032
2033         sig = mono_method_signature (method);
2034
2035         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
2036         
2037         /* This is the opposite of the code in emit_prolog */
2038
2039         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2040                 ArgInfo *ainfo = cinfo->args + i;
2041                 MonoType *arg_type;
2042                 inst = cfg->args [i];
2043
2044                 if (sig->hasthis && (i == 0))
2045                         arg_type = &mono_defaults.object_class->byval_arg;
2046                 else
2047                         arg_type = sig->params [i - sig->hasthis];
2048
2049                 /*
2050                  * On x86, the arguments are either in their original stack locations, or in
2051                  * global regs.
2052                  */
2053                 if (inst->opcode == OP_REGVAR) {
2054                         g_assert (ainfo->storage == ArgOnStack);
2055                         
2056                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
2057                 }
2058         }
2059
2060         return code;
2061 }
2062
2063 #define REAL_PRINT_REG(text,reg) \
2064 mono_assert (reg >= 0); \
2065 x86_push_reg (code, X86_EAX); \
2066 x86_push_reg (code, X86_EDX); \
2067 x86_push_reg (code, X86_ECX); \
2068 x86_push_reg (code, reg); \
2069 x86_push_imm (code, reg); \
2070 x86_push_imm (code, text " %d %p\n"); \
2071 x86_mov_reg_imm (code, X86_EAX, printf); \
2072 x86_call_reg (code, X86_EAX); \
2073 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2074 x86_pop_reg (code, X86_ECX); \
2075 x86_pop_reg (code, X86_EDX); \
2076 x86_pop_reg (code, X86_EAX);
2077
2078 /* benchmark and set based on cpu */
2079 #define LOOP_ALIGNMENT 8
2080 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2081
2082 void
2083 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2084 {
2085         MonoInst *ins;
2086         MonoCallInst *call;
2087         guint offset;
2088         guint8 *code = cfg->native_code + cfg->code_len;
2089         MonoInst *last_ins = NULL;
2090         guint last_offset = 0;
2091         int max_len, cpos;
2092
2093         if (cfg->opt & MONO_OPT_PEEPHOLE)
2094                 peephole_pass (cfg, bb);
2095
2096         if (cfg->opt & MONO_OPT_LOOP) {
2097                 int pad, align = LOOP_ALIGNMENT;
2098                 /* set alignment depending on cpu */
2099                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2100                         pad = align - pad;
2101                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2102                         x86_padding (code, pad);
2103                         cfg->code_len += pad;
2104                         bb->native_offset = cfg->code_len;
2105                 }
2106         }
2107
2108         if (cfg->verbose_level > 2)
2109                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2110
2111         cpos = bb->max_offset;
2112
2113         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2114                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2115                 g_assert (!cfg->compile_aot);
2116                 cpos += 6;
2117
2118                 cov->data [bb->dfn].cil_code = bb->cil_code;
2119                 /* this is not thread save, but good enough */
2120                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2121         }
2122
2123         offset = code - cfg->native_code;
2124
2125         mono_debug_open_block (cfg, bb, offset);
2126
2127         ins = bb->code;
2128         while (ins) {
2129                 offset = code - cfg->native_code;
2130
2131                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2132
2133                 if (offset > (cfg->code_size - max_len - 16)) {
2134                         cfg->code_size *= 2;
2135                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2136                         code = cfg->native_code + offset;
2137                         mono_jit_stats.code_reallocs++;
2138                 }
2139
2140                 mono_debug_record_line_number (cfg, ins, offset);
2141
2142                 switch (ins->opcode) {
2143                 case OP_BIGMUL:
2144                         x86_mul_reg (code, ins->sreg2, TRUE);
2145                         break;
2146                 case OP_BIGMUL_UN:
2147                         x86_mul_reg (code, ins->sreg2, FALSE);
2148                         break;
2149                 case OP_X86_SETEQ_MEMBASE:
2150                 case OP_X86_SETNE_MEMBASE:
2151                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2152                                          ins->inst_basereg, ins->inst_offset, TRUE);
2153                         break;
2154                 case OP_STOREI1_MEMBASE_IMM:
2155                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2156                         break;
2157                 case OP_STOREI2_MEMBASE_IMM:
2158                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2159                         break;
2160                 case OP_STORE_MEMBASE_IMM:
2161                 case OP_STOREI4_MEMBASE_IMM:
2162                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2163                         break;
2164                 case OP_STOREI1_MEMBASE_REG:
2165                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2166                         break;
2167                 case OP_STOREI2_MEMBASE_REG:
2168                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2169                         break;
2170                 case OP_STORE_MEMBASE_REG:
2171                 case OP_STOREI4_MEMBASE_REG:
2172                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2173                         break;
2174                 case CEE_LDIND_I:
2175                 case CEE_LDIND_I4:
2176                 case CEE_LDIND_U4:
2177                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2178                         break;
2179                 case OP_LOADU4_MEM:
2180                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2181                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2182                         break;
2183                 case OP_LOAD_MEMBASE:
2184                 case OP_LOADI4_MEMBASE:
2185                 case OP_LOADU4_MEMBASE:
2186                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2187                         break;
2188                 case OP_LOADU1_MEMBASE:
2189                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2190                         break;
2191                 case OP_LOADI1_MEMBASE:
2192                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2193                         break;
2194                 case OP_LOADU2_MEMBASE:
2195                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2196                         break;
2197                 case OP_LOADI2_MEMBASE:
2198                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2199                         break;
2200                 case CEE_CONV_I1:
2201                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2202                         break;
2203                 case CEE_CONV_I2:
2204                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2205                         break;
2206                 case CEE_CONV_U1:
2207                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2208                         break;
2209                 case CEE_CONV_U2:
2210                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2211                         break;
2212                 case OP_COMPARE:
2213                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2214                         break;
2215                 case OP_COMPARE_IMM:
2216                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2217                         break;
2218                 case OP_X86_COMPARE_MEMBASE_REG:
2219                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2220                         break;
2221                 case OP_X86_COMPARE_MEMBASE_IMM:
2222                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2223                         break;
2224                 case OP_X86_COMPARE_MEMBASE8_IMM:
2225                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2226                         break;
2227                 case OP_X86_COMPARE_REG_MEMBASE:
2228                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2229                         break;
2230                 case OP_X86_COMPARE_MEM_IMM:
2231                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2232                         break;
2233                 case OP_X86_TEST_NULL:
2234                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2235                         break;
2236                 case OP_X86_ADD_MEMBASE_IMM:
2237                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2238                         break;
2239                 case OP_X86_ADD_MEMBASE:
2240                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2241                         break;
2242                 case OP_X86_SUB_MEMBASE_IMM:
2243                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2244                         break;
2245                 case OP_X86_SUB_MEMBASE:
2246                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2247                         break;
2248                 case OP_X86_AND_MEMBASE_IMM:
2249                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2250                         break;
2251                 case OP_X86_OR_MEMBASE_IMM:
2252                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2253                         break;
2254                 case OP_X86_XOR_MEMBASE_IMM:
2255                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2256                         break;
2257                 case OP_X86_INC_MEMBASE:
2258                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2259                         break;
2260                 case OP_X86_INC_REG:
2261                         x86_inc_reg (code, ins->dreg);
2262                         break;
2263                 case OP_X86_DEC_MEMBASE:
2264                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2265                         break;
2266                 case OP_X86_DEC_REG:
2267                         x86_dec_reg (code, ins->dreg);
2268                         break;
2269                 case OP_X86_MUL_MEMBASE:
2270                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2271                         break;
2272                 case OP_BREAK:
2273                         x86_breakpoint (code);
2274                         break;
2275                 case OP_ADDCC:
2276                 case CEE_ADD:
2277                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2278                         break;
2279                 case OP_ADC:
2280                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2281                         break;
2282                 case OP_ADDCC_IMM:
2283                 case OP_ADD_IMM:
2284                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2285                         break;
2286                 case OP_ADC_IMM:
2287                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2288                         break;
2289                 case OP_SUBCC:
2290                 case CEE_SUB:
2291                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2292                         break;
2293                 case OP_SBB:
2294                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2295                         break;
2296                 case OP_SUBCC_IMM:
2297                 case OP_SUB_IMM:
2298                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2299                         break;
2300                 case OP_SBB_IMM:
2301                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2302                         break;
2303                 case CEE_AND:
2304                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2305                         break;
2306                 case OP_AND_IMM:
2307                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2308                         break;
2309                 case CEE_DIV:
2310                         x86_cdq (code);
2311                         x86_div_reg (code, ins->sreg2, TRUE);
2312                         break;
2313                 case CEE_DIV_UN:
2314                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2315                         x86_div_reg (code, ins->sreg2, FALSE);
2316                         break;
2317                 case OP_DIV_IMM:
2318                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2319                         x86_cdq (code);
2320                         x86_div_reg (code, ins->sreg2, TRUE);
2321                         break;
2322                 case CEE_REM:
2323                         x86_cdq (code);
2324                         x86_div_reg (code, ins->sreg2, TRUE);
2325                         break;
2326                 case CEE_REM_UN:
2327                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2328                         x86_div_reg (code, ins->sreg2, FALSE);
2329                         break;
2330                 case OP_REM_IMM:
2331                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2332                         x86_cdq (code);
2333                         x86_div_reg (code, ins->sreg2, TRUE);
2334                         break;
2335                 case CEE_OR:
2336                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2337                         break;
2338                 case OP_OR_IMM:
2339                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2340                         break;
2341                 case CEE_XOR:
2342                 case OP_IXOR:
2343                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2344                         break;
2345                 case OP_XOR_IMM:
2346                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2347                         break;
2348                 case CEE_SHL:
2349                         g_assert (ins->sreg2 == X86_ECX);
2350                         x86_shift_reg (code, X86_SHL, ins->dreg);
2351                         break;
2352                 case CEE_SHR:
2353                         g_assert (ins->sreg2 == X86_ECX);
2354                         x86_shift_reg (code, X86_SAR, ins->dreg);
2355                         break;
2356                 case OP_SHR_IMM:
2357                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2358                         break;
2359                 case OP_SHR_UN_IMM:
2360                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2361                         break;
2362                 case CEE_SHR_UN:
2363                         g_assert (ins->sreg2 == X86_ECX);
2364                         x86_shift_reg (code, X86_SHR, ins->dreg);
2365                         break;
2366                 case OP_SHL_IMM:
2367                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2368                         break;
2369                 case OP_LSHL: {
2370                         guint8 *jump_to_end;
2371
2372                         /* handle shifts below 32 bits */
2373                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2374                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2375
2376                         x86_test_reg_imm (code, X86_ECX, 32);
2377                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2378
2379                         /* handle shift over 32 bit */
2380                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2381                         x86_clear_reg (code, ins->sreg1);
2382                         
2383                         x86_patch (jump_to_end, code);
2384                         }
2385                         break;
2386                 case OP_LSHR: {
2387                         guint8 *jump_to_end;
2388
2389                         /* handle shifts below 32 bits */
2390                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2391                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2392
2393                         x86_test_reg_imm (code, X86_ECX, 32);
2394                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2395
2396                         /* handle shifts over 31 bits */
2397                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2398                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2399                         
2400                         x86_patch (jump_to_end, code);
2401                         }
2402                         break;
2403                 case OP_LSHR_UN: {
2404                         guint8 *jump_to_end;
2405
2406                         /* handle shifts below 32 bits */
2407                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2408                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2409
2410                         x86_test_reg_imm (code, X86_ECX, 32);
2411                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2412
2413                         /* handle shifts over 31 bits */
2414                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2415                         x86_clear_reg (code, ins->backend.reg3);
2416                         
2417                         x86_patch (jump_to_end, code);
2418                         }
2419                         break;
2420                 case OP_LSHL_IMM:
2421                         if (ins->inst_imm >= 32) {
2422                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2423                                 x86_clear_reg (code, ins->sreg1);
2424                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2425                         } else {
2426                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2427                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2428                         }
2429                         break;
2430                 case OP_LSHR_IMM:
2431                         if (ins->inst_imm >= 32) {
2432                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2433                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2434                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2435                         } else {
2436                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2437                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2438                         }
2439                         break;
2440                 case OP_LSHR_UN_IMM:
2441                         if (ins->inst_imm >= 32) {
2442                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2443                                 x86_clear_reg (code, ins->backend.reg3);
2444                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2445                         } else {
2446                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2447                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2448                         }
2449                         break;
2450                 case CEE_NOT:
2451                         x86_not_reg (code, ins->sreg1);
2452                         break;
2453                 case CEE_NEG:
2454                         x86_neg_reg (code, ins->sreg1);
2455                         break;
2456                 case OP_SEXT_I1:
2457                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2458                         break;
2459                 case OP_SEXT_I2:
2460                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2461                         break;
2462                 case CEE_MUL:
2463                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2464                         break;
2465                 case OP_MUL_IMM:
2466                         switch (ins->inst_imm) {
2467                         case 2:
2468                                 /* MOV r1, r2 */
2469                                 /* ADD r1, r1 */
2470                                 if (ins->dreg != ins->sreg1)
2471                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2472                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2473                                 break;
2474                         case 3:
2475                                 /* LEA r1, [r2 + r2*2] */
2476                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2477                                 break;
2478                         case 5:
2479                                 /* LEA r1, [r2 + r2*4] */
2480                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2481                                 break;
2482                         case 6:
2483                                 /* LEA r1, [r2 + r2*2] */
2484                                 /* ADD r1, r1          */
2485                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2486                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2487                                 break;
2488                         case 9:
2489                                 /* LEA r1, [r2 + r2*8] */
2490                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2491                                 break;
2492                         case 10:
2493                                 /* LEA r1, [r2 + r2*4] */
2494                                 /* ADD r1, r1          */
2495                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2496                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2497                                 break;
2498                         case 12:
2499                                 /* LEA r1, [r2 + r2*2] */
2500                                 /* SHL r1, 2           */
2501                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2502                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2503                                 break;
2504                         case 25:
2505                                 /* LEA r1, [r2 + r2*4] */
2506                                 /* LEA r1, [r1 + r1*4] */
2507                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2508                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2509                                 break;
2510                         case 100:
2511                                 /* LEA r1, [r2 + r2*4] */
2512                                 /* SHL r1, 2           */
2513                                 /* LEA r1, [r1 + r1*4] */
2514                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2515                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2516                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2517                                 break;
2518                         default:
2519                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2520                                 break;
2521                         }
2522                         break;
2523                 case CEE_MUL_OVF:
2524                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2525                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2526                         break;
2527                 case CEE_MUL_OVF_UN: {
2528                         /* the mul operation and the exception check should most likely be split */
2529                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2530                         /*g_assert (ins->sreg2 == X86_EAX);
2531                         g_assert (ins->dreg == X86_EAX);*/
2532                         if (ins->sreg2 == X86_EAX) {
2533                                 non_eax_reg = ins->sreg1;
2534                         } else if (ins->sreg1 == X86_EAX) {
2535                                 non_eax_reg = ins->sreg2;
2536                         } else {
2537                                 /* no need to save since we're going to store to it anyway */
2538                                 if (ins->dreg != X86_EAX) {
2539                                         saved_eax = TRUE;
2540                                         x86_push_reg (code, X86_EAX);
2541                                 }
2542                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2543                                 non_eax_reg = ins->sreg2;
2544                         }
2545                         if (ins->dreg == X86_EDX) {
2546                                 if (!saved_eax) {
2547                                         saved_eax = TRUE;
2548                                         x86_push_reg (code, X86_EAX);
2549                                 }
2550                         } else if (ins->dreg != X86_EAX) {
2551                                 saved_edx = TRUE;
2552                                 x86_push_reg (code, X86_EDX);
2553                         }
2554                         x86_mul_reg (code, non_eax_reg, FALSE);
2555                         /* save before the check since pop and mov don't change the flags */
2556                         if (ins->dreg != X86_EAX)
2557                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2558                         if (saved_edx)
2559                                 x86_pop_reg (code, X86_EDX);
2560                         if (saved_eax)
2561                                 x86_pop_reg (code, X86_EAX);
2562                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2563                         break;
2564                 }
2565                 case OP_ICONST:
2566                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2567                         break;
2568                 case OP_AOTCONST:
2569                         g_assert_not_reached ();
2570                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2571                         x86_mov_reg_imm (code, ins->dreg, 0);
2572                         break;
2573                 case OP_LOAD_GOTADDR:
2574                         x86_call_imm (code, 0);
2575                         /* 
2576                          * The patch needs to point to the pop, since the GOT offset needs 
2577                          * to be added to that address.
2578                          */
2579                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2580                         x86_pop_reg (code, ins->dreg);
2581                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2582                         break;
2583                 case OP_GOT_ENTRY:
2584                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2585                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2586                         break;
2587                 case OP_X86_PUSH_GOT_ENTRY:
2588                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2589                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2590                         break;
2591                 case CEE_CONV_I4:
2592                 case OP_MOVE:
2593                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2594                         break;
2595                 case CEE_CONV_U4:
2596                         g_assert_not_reached ();
2597                 case OP_JMP: {
2598                         /*
2599                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2600                          * Keep in sync with the code in emit_epilog.
2601                          */
2602                         int pos = 0;
2603
2604                         /* FIXME: no tracing support... */
2605                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2606                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2607                         /* reset offset to make max_len work */
2608                         offset = code - cfg->native_code;
2609
2610                         g_assert (!cfg->method->save_lmf);
2611
2612                         code = emit_load_volatile_arguments (cfg, code);
2613
2614                         if (cfg->used_int_regs & (1 << X86_EBX))
2615                                 pos -= 4;
2616                         if (cfg->used_int_regs & (1 << X86_EDI))
2617                                 pos -= 4;
2618                         if (cfg->used_int_regs & (1 << X86_ESI))
2619                                 pos -= 4;
2620                         if (pos)
2621                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2622         
2623                         if (cfg->used_int_regs & (1 << X86_ESI))
2624                                 x86_pop_reg (code, X86_ESI);
2625                         if (cfg->used_int_regs & (1 << X86_EDI))
2626                                 x86_pop_reg (code, X86_EDI);
2627                         if (cfg->used_int_regs & (1 << X86_EBX))
2628                                 x86_pop_reg (code, X86_EBX);
2629         
2630                         /* restore ESP/EBP */
2631                         x86_leave (code);
2632                         offset = code - cfg->native_code;
2633                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2634                         x86_jump32 (code, 0);
2635                         break;
2636                 }
2637                 case OP_CHECK_THIS:
2638                         /* ensure ins->sreg1 is not NULL
2639                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2640                          * cmp DWORD PTR [eax], 0
2641                          */
2642                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2643                         break;
2644                 case OP_ARGLIST: {
2645                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2646                         x86_push_reg (code, hreg);
2647                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2648                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2649                         x86_pop_reg (code, hreg);
2650                         break;
2651                 }
2652                 case OP_FCALL:
2653                 case OP_LCALL:
2654                 case OP_VCALL:
2655                 case OP_VOIDCALL:
2656                 case CEE_CALL:
2657                         call = (MonoCallInst*)ins;
2658                         if (ins->flags & MONO_INST_HAS_METHOD)
2659                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2660                         else
2661                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2662                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2663                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2664                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2665                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2666                                  * smart enough to do that optimization yet
2667                                  *
2668                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2669                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2670                                  * (most likely from locality benefits). People with other processors should
2671                                  * check on theirs to see what happens.
2672                                  */
2673                                 if (call->stack_usage == 4) {
2674                                         /* we want to use registers that won't get used soon, so use
2675                                          * ecx, as eax will get allocated first. edx is used by long calls,
2676                                          * so we can't use that.
2677                                          */
2678                                         
2679                                         x86_pop_reg (code, X86_ECX);
2680                                 } else {
2681                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2682                                 }
2683                         }
2684                         code = emit_move_return_value (cfg, ins, code);
2685                         break;
2686                 case OP_FCALL_REG:
2687                 case OP_LCALL_REG:
2688                 case OP_VCALL_REG:
2689                 case OP_VOIDCALL_REG:
2690                 case OP_CALL_REG:
2691                         call = (MonoCallInst*)ins;
2692                         x86_call_reg (code, ins->sreg1);
2693                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2694                                 if (call->stack_usage == 4)
2695                                         x86_pop_reg (code, X86_ECX);
2696                                 else
2697                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2698                         }
2699                         code = emit_move_return_value (cfg, ins, code);
2700                         break;
2701                 case OP_FCALL_MEMBASE:
2702                 case OP_LCALL_MEMBASE:
2703                 case OP_VCALL_MEMBASE:
2704                 case OP_VOIDCALL_MEMBASE:
2705                 case OP_CALL_MEMBASE:
2706                         call = (MonoCallInst*)ins;
2707                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2708                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2709                                 if (call->stack_usage == 4)
2710                                         x86_pop_reg (code, X86_ECX);
2711                                 else
2712                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2713                         }
2714                         code = emit_move_return_value (cfg, ins, code);
2715                         break;
2716                 case OP_OUTARG:
2717                 case OP_X86_PUSH:
2718                         x86_push_reg (code, ins->sreg1);
2719                         break;
2720                 case OP_X86_PUSH_IMM:
2721                         x86_push_imm (code, ins->inst_imm);
2722                         break;
2723                 case OP_X86_PUSH_MEMBASE:
2724                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2725                         break;
2726                 case OP_X86_PUSH_OBJ: 
2727                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2728                         x86_push_reg (code, X86_EDI);
2729                         x86_push_reg (code, X86_ESI);
2730                         x86_push_reg (code, X86_ECX);
2731                         if (ins->inst_offset)
2732                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2733                         else
2734                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2735                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2736                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2737                         x86_cld (code);
2738                         x86_prefix (code, X86_REP_PREFIX);
2739                         x86_movsd (code);
2740                         x86_pop_reg (code, X86_ECX);
2741                         x86_pop_reg (code, X86_ESI);
2742                         x86_pop_reg (code, X86_EDI);
2743                         break;
2744                 case OP_X86_LEA:
2745                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2746                         break;
2747                 case OP_X86_LEA_MEMBASE:
2748                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2749                         break;
2750                 case OP_X86_XCHG:
2751                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2752                         break;
2753                 case OP_LOCALLOC:
2754                         /* keep alignment */
2755                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2756                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2757                         code = mono_emit_stack_alloc (code, ins);
2758                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2759                         break;
2760                 case CEE_RET:
2761                         x86_ret (code);
2762                         break;
2763                 case OP_THROW: {
2764                         x86_push_reg (code, ins->sreg1);
2765                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2766                                                           (gpointer)"mono_arch_throw_exception");
2767                         break;
2768                 }
2769                 case OP_RETHROW: {
2770                         x86_push_reg (code, ins->sreg1);
2771                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2772                                                           (gpointer)"mono_arch_rethrow_exception");
2773                         break;
2774                 }
2775                 case OP_CALL_HANDLER: 
2776                         /* Align stack */
2777 #ifdef __APPLE__
2778                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2779 #endif
2780                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2781                         x86_call_imm (code, 0);
2782 #ifdef __APPLE__
2783                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2784 #endif
2785                         break;
2786                 case OP_LABEL:
2787                         ins->inst_c0 = code - cfg->native_code;
2788                         break;
2789                 case OP_BR:
2790                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2791                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2792                         //break;
2793                         if (ins->flags & MONO_INST_BRLABEL) {
2794                                 if (ins->inst_i0->inst_c0) {
2795                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2796                                 } else {
2797                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2798                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2799                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2800                                                 x86_jump8 (code, 0);
2801                                         else 
2802                                                 x86_jump32 (code, 0);
2803                                 }
2804                         } else {
2805                                 if (ins->inst_target_bb->native_offset) {
2806                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2807                                 } else {
2808                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2809                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2810                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2811                                                 x86_jump8 (code, 0);
2812                                         else 
2813                                                 x86_jump32 (code, 0);
2814                                 } 
2815                         }
2816                         break;
2817                 case OP_BR_REG:
2818                         x86_jump_reg (code, ins->sreg1);
2819                         break;
2820                 case OP_CEQ:
2821                 case OP_CLT:
2822                 case OP_CLT_UN:
2823                 case OP_CGT:
2824                 case OP_CGT_UN:
2825                 case OP_CNE:
2826                         x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2827                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2828                         break;
2829                 case OP_COND_EXC_EQ:
2830                 case OP_COND_EXC_NE_UN:
2831                 case OP_COND_EXC_LT:
2832                 case OP_COND_EXC_LT_UN:
2833                 case OP_COND_EXC_GT:
2834                 case OP_COND_EXC_GT_UN:
2835                 case OP_COND_EXC_GE:
2836                 case OP_COND_EXC_GE_UN:
2837                 case OP_COND_EXC_LE:
2838                 case OP_COND_EXC_LE_UN:
2839                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
2840                         break;
2841                 case OP_COND_EXC_OV:
2842                 case OP_COND_EXC_NO:
2843                 case OP_COND_EXC_C:
2844                 case OP_COND_EXC_NC:
2845                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2846                         break;
2847                 case CEE_BEQ:
2848                 case CEE_BNE_UN:
2849                 case CEE_BLT:
2850                 case CEE_BLT_UN:
2851                 case CEE_BGT:
2852                 case CEE_BGT_UN:
2853                 case CEE_BGE:
2854                 case CEE_BGE_UN:
2855                 case CEE_BLE:
2856                 case CEE_BLE_UN:
2857                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2858                         break;
2859
2860                 /* floating point opcodes */
2861                 case OP_R8CONST: {
2862                         double d = *(double *)ins->inst_p0;
2863
2864                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2865                                 x86_fldz (code);
2866                         } else if (d == 1.0) {
2867                                 x86_fld1 (code);
2868                         } else {
2869                                 if (cfg->compile_aot) {
2870                                         guint32 *val = (guint32*)&d;
2871                                         x86_push_imm (code, val [1]);
2872                                         x86_push_imm (code, val [0]);
2873                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2874                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2875                                 }
2876                                 else {
2877                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2878                                         x86_fld (code, NULL, TRUE);
2879                                 }
2880                         }
2881                         break;
2882                 }
2883                 case OP_R4CONST: {
2884                         float f = *(float *)ins->inst_p0;
2885
2886                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2887                                 x86_fldz (code);
2888                         } else if (f == 1.0) {
2889                                 x86_fld1 (code);
2890                         } else {
2891                                 if (cfg->compile_aot) {
2892                                         guint32 val = *(guint32*)&f;
2893                                         x86_push_imm (code, val);
2894                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2895                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2896                                 }
2897                                 else {
2898                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2899                                         x86_fld (code, NULL, FALSE);
2900                                 }
2901                         }
2902                         break;
2903                 }
2904                 case OP_STORER8_MEMBASE_REG:
2905                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2906                         break;
2907                 case OP_LOADR8_SPILL_MEMBASE:
2908                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2909                         x86_fxch (code, 1);
2910                         break;
2911                 case OP_LOADR8_MEMBASE:
2912                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2913                         break;
2914                 case OP_STORER4_MEMBASE_REG:
2915                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2916                         break;
2917                 case OP_LOADR4_MEMBASE:
2918                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2919                         break;
2920                 case CEE_CONV_R4: /* FIXME: change precision */
2921                 case CEE_CONV_R8:
2922                         x86_push_reg (code, ins->sreg1);
2923                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2924                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2925                         break;
2926                 case OP_X86_FP_LOAD_I8:
2927                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2928                         break;
2929                 case OP_X86_FP_LOAD_I4:
2930                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2931                         break;
2932                 case OP_FCONV_TO_I1:
2933                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2934                         break;
2935                 case OP_FCONV_TO_U1:
2936                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2937                         break;
2938                 case OP_FCONV_TO_I2:
2939                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2940                         break;
2941                 case OP_FCONV_TO_U2:
2942                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2943                         break;
2944                 case OP_FCONV_TO_I4:
2945                 case OP_FCONV_TO_I:
2946                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2947                         break;
2948                 case OP_FCONV_TO_I8:
2949                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2950                         x86_fnstcw_membase(code, X86_ESP, 0);
2951                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2952                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2953                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2954                         x86_fldcw_membase (code, X86_ESP, 2);
2955                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2956                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2957                         x86_pop_reg (code, ins->dreg);
2958                         x86_pop_reg (code, ins->backend.reg3);
2959                         x86_fldcw_membase (code, X86_ESP, 0);
2960                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2961                         break;
2962                 case OP_LCONV_TO_R_UN: { 
2963                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2964                         guint8 *br;
2965
2966                         /* load 64bit integer to FP stack */
2967                         x86_push_imm (code, 0);
2968                         x86_push_reg (code, ins->sreg2);
2969                         x86_push_reg (code, ins->sreg1);
2970                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2971                         /* store as 80bit FP value */
2972                         x86_fst80_membase (code, X86_ESP, 0);
2973                         
2974                         /* test if lreg is negative */
2975                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2976                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2977         
2978                         /* add correction constant mn */
2979                         x86_fld80_mem (code, mn);
2980                         x86_fld80_membase (code, X86_ESP, 0);
2981                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2982                         x86_fst80_membase (code, X86_ESP, 0);
2983
2984                         x86_patch (br, code);
2985
2986                         x86_fld80_membase (code, X86_ESP, 0);
2987                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2988
2989                         break;
2990                 }
2991                 case OP_LCONV_TO_OVF_I: {
2992                         guint8 *br [3], *label [1];
2993                         MonoInst *tins;
2994
2995                         /* 
2996                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2997                          */
2998                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2999
3000                         /* If the low word top bit is set, see if we are negative */
3001                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3002                         /* We are not negative (no top bit set, check for our top word to be zero */
3003                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3004                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3005                         label [0] = code;
3006
3007                         /* throw exception */
3008                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
3009                         if (tins) {
3010                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
3011                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
3012                                         x86_jump8 (code, 0);
3013                                 else
3014                                         x86_jump32 (code, 0);
3015                         } else {
3016                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3017                                 x86_jump32 (code, 0);
3018                         }
3019         
3020         
3021                         x86_patch (br [0], code);
3022                         /* our top bit is set, check that top word is 0xfffffff */
3023                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3024                 
3025                         x86_patch (br [1], code);
3026                         /* nope, emit exception */
3027                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3028                         x86_patch (br [2], label [0]);
3029
3030                         if (ins->dreg != ins->sreg1)
3031                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3032                         break;
3033                 }
3034                 case OP_FADD:
3035                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3036                         break;
3037                 case OP_FSUB:
3038                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3039                         break;          
3040                 case OP_FMUL:
3041                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3042                         break;          
3043                 case OP_FDIV:
3044                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3045                         break;          
3046                 case OP_FNEG:
3047                         x86_fchs (code);
3048                         break;          
3049                 case OP_SIN:
3050                         x86_fsin (code);
3051                         x86_fldz (code);
3052                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3053                         break;          
3054                 case OP_COS:
3055                         x86_fcos (code);
3056                         x86_fldz (code);
3057                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3058                         break;          
3059                 case OP_ABS:
3060                         x86_fabs (code);
3061                         break;          
3062                 case OP_TAN: {
3063                         /* 
3064                          * it really doesn't make sense to inline all this code,
3065                          * it's here just to show that things may not be as simple 
3066                          * as they appear.
3067                          */
3068                         guchar *check_pos, *end_tan, *pop_jump;
3069                         x86_push_reg (code, X86_EAX);
3070                         x86_fptan (code);
3071                         x86_fnstsw (code);
3072                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3073                         check_pos = code;
3074                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3075                         x86_fstp (code, 0); /* pop the 1.0 */
3076                         end_tan = code;
3077                         x86_jump8 (code, 0);
3078                         x86_fldpi (code);
3079                         x86_fp_op (code, X86_FADD, 0);
3080                         x86_fxch (code, 1);
3081                         x86_fprem1 (code);
3082                         x86_fstsw (code);
3083                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3084                         pop_jump = code;
3085                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3086                         x86_fstp (code, 1);
3087                         x86_fptan (code);
3088                         x86_patch (pop_jump, code);
3089                         x86_fstp (code, 0); /* pop the 1.0 */
3090                         x86_patch (check_pos, code);
3091                         x86_patch (end_tan, code);
3092                         x86_fldz (code);
3093                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3094                         x86_pop_reg (code, X86_EAX);
3095                         break;
3096                 }
3097                 case OP_ATAN:
3098                         x86_fld1 (code);
3099                         x86_fpatan (code);
3100                         x86_fldz (code);
3101                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3102                         break;          
3103                 case OP_SQRT:
3104                         x86_fsqrt (code);
3105                         break;          
3106                 case OP_X86_FPOP:
3107                         x86_fstp (code, 0);
3108                         break;          
3109                 case OP_FREM: {
3110                         guint8 *l1, *l2;
3111
3112                         x86_push_reg (code, X86_EAX);
3113                         /* we need to exchange ST(0) with ST(1) */
3114                         x86_fxch (code, 1);
3115
3116                         /* this requires a loop, because fprem somtimes 
3117                          * returns a partial remainder */
3118                         l1 = code;
3119                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3120                         /* x86_fprem1 (code); */
3121                         x86_fprem (code);
3122                         x86_fnstsw (code);
3123                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3124                         l2 = code + 2;
3125                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3126
3127                         /* pop result */
3128                         x86_fstp (code, 1);
3129
3130                         x86_pop_reg (code, X86_EAX);
3131                         break;
3132                 }
3133                 case OP_FCOMPARE:
3134                         if (cfg->opt & MONO_OPT_FCMOV) {
3135                                 x86_fcomip (code, 1);
3136                                 x86_fstp (code, 0);
3137                                 break;
3138                         }
3139                         /* this overwrites EAX */
3140                         EMIT_FPCOMPARE(code);
3141                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3142                         break;
3143                 case OP_FCEQ:
3144                         if (cfg->opt & MONO_OPT_FCMOV) {
3145                                 /* zeroing the register at the start results in 
3146                                  * shorter and faster code (we can also remove the widening op)
3147                                  */
3148                                 guchar *unordered_check;
3149                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3150                                 x86_fcomip (code, 1);
3151                                 x86_fstp (code, 0);
3152                                 unordered_check = code;
3153                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3154                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3155                                 x86_patch (unordered_check, code);
3156                                 break;
3157                         }
3158                         if (ins->dreg != X86_EAX) 
3159                                 x86_push_reg (code, X86_EAX);
3160
3161                         EMIT_FPCOMPARE(code);
3162                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3163                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3164                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3165                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3166
3167                         if (ins->dreg != X86_EAX) 
3168                                 x86_pop_reg (code, X86_EAX);
3169                         break;
3170                 case OP_FCLT:
3171                 case OP_FCLT_UN:
3172                         if (cfg->opt & MONO_OPT_FCMOV) {
3173                                 /* zeroing the register at the start results in 
3174                                  * shorter and faster code (we can also remove the widening op)
3175                                  */
3176                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3177                                 x86_fcomip (code, 1);
3178                                 x86_fstp (code, 0);
3179                                 if (ins->opcode == OP_FCLT_UN) {
3180                                         guchar *unordered_check = code;
3181                                         guchar *jump_to_end;
3182                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3183                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3184                                         jump_to_end = code;
3185                                         x86_jump8 (code, 0);
3186                                         x86_patch (unordered_check, code);
3187                                         x86_inc_reg (code, ins->dreg);
3188                                         x86_patch (jump_to_end, code);
3189                                 } else {
3190                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3191                                 }
3192                                 break;
3193                         }
3194                         if (ins->dreg != X86_EAX) 
3195                                 x86_push_reg (code, X86_EAX);
3196
3197                         EMIT_FPCOMPARE(code);
3198                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3199                         if (ins->opcode == OP_FCLT_UN) {
3200                                 guchar *is_not_zero_check, *end_jump;
3201                                 is_not_zero_check = code;
3202                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3203                                 end_jump = code;
3204                                 x86_jump8 (code, 0);
3205                                 x86_patch (is_not_zero_check, code);
3206                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3207
3208                                 x86_patch (end_jump, code);
3209                         }
3210                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3211                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3212
3213                         if (ins->dreg != X86_EAX) 
3214                                 x86_pop_reg (code, X86_EAX);
3215                         break;
3216                 case OP_FCGT:
3217                 case OP_FCGT_UN:
3218                         if (cfg->opt & MONO_OPT_FCMOV) {
3219                                 /* zeroing the register at the start results in 
3220                                  * shorter and faster code (we can also remove the widening op)
3221                                  */
3222                                 guchar *unordered_check;
3223                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3224                                 x86_fcomip (code, 1);
3225                                 x86_fstp (code, 0);
3226                                 if (ins->opcode == OP_FCGT) {
3227                                         unordered_check = code;
3228                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3229                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3230                                         x86_patch (unordered_check, code);
3231                                 } else {
3232                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3233                                 }
3234                                 break;
3235                         }
3236                         if (ins->dreg != X86_EAX) 
3237                                 x86_push_reg (code, X86_EAX);
3238
3239                         EMIT_FPCOMPARE(code);
3240                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3241                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3242                         if (ins->opcode == OP_FCGT_UN) {
3243                                 guchar *is_not_zero_check, *end_jump;
3244                                 is_not_zero_check = code;
3245                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3246                                 end_jump = code;
3247                                 x86_jump8 (code, 0);
3248                                 x86_patch (is_not_zero_check, code);
3249                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3250         
3251                                 x86_patch (end_jump, code);
3252                         }
3253                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3254                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3255
3256                         if (ins->dreg != X86_EAX) 
3257                                 x86_pop_reg (code, X86_EAX);
3258                         break;
3259                 case OP_FBEQ:
3260                         if (cfg->opt & MONO_OPT_FCMOV) {
3261                                 guchar *jump = code;
3262                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3263                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3264                                 x86_patch (jump, code);
3265                                 break;
3266                         }
3267                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3268                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3269                         break;
3270                 case OP_FBNE_UN:
3271                         /* Branch if C013 != 100 */
3272                         if (cfg->opt & MONO_OPT_FCMOV) {
3273                                 /* branch if !ZF or (PF|CF) */
3274                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3275                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3276                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3277                                 break;
3278                         }
3279                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3280                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3281                         break;
3282                 case OP_FBLT:
3283                         if (cfg->opt & MONO_OPT_FCMOV) {
3284                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3285                                 break;
3286                         }
3287                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3288                         break;
3289                 case OP_FBLT_UN:
3290                         if (cfg->opt & MONO_OPT_FCMOV) {
3291                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3292                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3293                                 break;
3294                         }
3295                         if (ins->opcode == OP_FBLT_UN) {
3296                                 guchar *is_not_zero_check, *end_jump;
3297                                 is_not_zero_check = code;
3298                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3299                                 end_jump = code;
3300                                 x86_jump8 (code, 0);
3301                                 x86_patch (is_not_zero_check, code);
3302                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3303
3304                                 x86_patch (end_jump, code);
3305                         }
3306                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3307                         break;
3308                 case OP_FBGT:
3309                 case OP_FBGT_UN:
3310                         if (cfg->opt & MONO_OPT_FCMOV) {
3311                                 if (ins->opcode == OP_FBGT) {
3312                                         guchar *br1;
3313
3314                                         /* skip branch if C1=1 */
3315                                         br1 = code;
3316                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3317                                         /* branch if (C0 | C3) = 1 */
3318                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3319                                         x86_patch (br1, code);
3320                                 } else {
3321                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3322                                 }
3323                                 break;
3324                         }
3325                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3326                         if (ins->opcode == OP_FBGT_UN) {
3327                                 guchar *is_not_zero_check, *end_jump;
3328                                 is_not_zero_check = code;
3329                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3330                                 end_jump = code;
3331                                 x86_jump8 (code, 0);
3332                                 x86_patch (is_not_zero_check, code);
3333                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3334
3335                                 x86_patch (end_jump, code);
3336                         }
3337                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3338                         break;
3339                 case OP_FBGE:
3340                         /* Branch if C013 == 100 or 001 */
3341                         if (cfg->opt & MONO_OPT_FCMOV) {
3342                                 guchar *br1;
3343
3344                                 /* skip branch if C1=1 */
3345                                 br1 = code;
3346                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3347                                 /* branch if (C0 | C3) = 1 */
3348                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3349                                 x86_patch (br1, code);
3350                                 break;
3351                         }
3352                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3353                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3354                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3355                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3356                         break;
3357                 case OP_FBGE_UN:
3358                         /* Branch if C013 == 000 */
3359                         if (cfg->opt & MONO_OPT_FCMOV) {
3360                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3361                                 break;
3362                         }
3363                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3364                         break;
3365                 case OP_FBLE:
3366                         /* Branch if C013=000 or 100 */
3367                         if (cfg->opt & MONO_OPT_FCMOV) {
3368                                 guchar *br1;
3369
3370                                 /* skip branch if C1=1 */
3371                                 br1 = code;
3372                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3373                                 /* branch if C0=0 */
3374                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3375                                 x86_patch (br1, code);
3376                                 break;
3377                         }
3378                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3379                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3380                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3381                         break;
3382                 case OP_FBLE_UN:
3383                         /* Branch if C013 != 001 */
3384                         if (cfg->opt & MONO_OPT_FCMOV) {
3385                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3386                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3387                                 break;
3388                         }
3389                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3390                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3391                         break;
3392                 case OP_CKFINITE: {
3393                         x86_push_reg (code, X86_EAX);
3394                         x86_fxam (code);
3395                         x86_fnstsw (code);
3396                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3397                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3398                         x86_pop_reg (code, X86_EAX);
3399                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3400                         break;
3401                 }
3402                 case OP_TLS_GET: {
3403                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3404                         break;
3405                 }
3406                 case OP_MEMORY_BARRIER: {
3407                         /* Not needed on x86 */
3408                         break;
3409                 }
3410                 case OP_ATOMIC_ADD_I4: {
3411                         int dreg = ins->dreg;
3412
3413                         if (dreg == ins->inst_basereg) {
3414                                 x86_push_reg (code, ins->sreg2);
3415                                 dreg = ins->sreg2;
3416                         } 
3417                         
3418                         if (dreg != ins->sreg2)
3419                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3420
3421                         x86_prefix (code, X86_LOCK_PREFIX);
3422                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3423
3424                         if (dreg != ins->dreg) {
3425                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3426                                 x86_pop_reg (code, dreg);
3427                         }
3428
3429                         break;
3430                 }
3431                 case OP_ATOMIC_ADD_NEW_I4: {
3432                         int dreg = ins->dreg;
3433
3434                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3435                         if (ins->sreg2 == dreg) {
3436                                 if (dreg == X86_EBX) {
3437                                         dreg = X86_EDI;
3438                                         if (ins->inst_basereg == X86_EDI)
3439                                                 dreg = X86_ESI;
3440                                 } else {
3441                                         dreg = X86_EBX;
3442                                         if (ins->inst_basereg == X86_EBX)
3443                                                 dreg = X86_EDI;
3444                                 }
3445                         } else if (ins->inst_basereg == dreg) {
3446                                 if (dreg == X86_EBX) {
3447                                         dreg = X86_EDI;
3448                                         if (ins->sreg2 == X86_EDI)
3449                                                 dreg = X86_ESI;
3450                                 } else {
3451                                         dreg = X86_EBX;
3452                                         if (ins->sreg2 == X86_EBX)
3453                                                 dreg = X86_EDI;
3454                                 }
3455                         }
3456
3457                         if (dreg != ins->dreg) {
3458                                 x86_push_reg (code, dreg);
3459                         }
3460
3461                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3462                         x86_prefix (code, X86_LOCK_PREFIX);
3463                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3464                         /* dreg contains the old value, add with sreg2 value */
3465                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3466                         
3467                         if (ins->dreg != dreg) {
3468                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3469                                 x86_pop_reg (code, dreg);
3470                         }
3471
3472                         break;
3473                 }
3474                 case OP_ATOMIC_EXCHANGE_I4: {
3475                         guchar *br[2];
3476                         int sreg2 = ins->sreg2;
3477                         int breg = ins->inst_basereg;
3478
3479                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3480                          * hack to overcome limits in x86 reg allocator 
3481                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3482                          */
3483                         if (ins->dreg != X86_EAX)
3484                                 x86_push_reg (code, X86_EAX);
3485                         
3486                         /* We need the EAX reg for the cmpxchg */
3487                         if (ins->sreg2 == X86_EAX) {
3488                                 x86_push_reg (code, X86_EDX);
3489                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3490                                 sreg2 = X86_EDX;
3491                         }
3492
3493                         if (breg == X86_EAX) {
3494                                 x86_push_reg (code, X86_ESI);
3495                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3496                                 breg = X86_ESI;
3497                         }
3498
3499                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3500
3501                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3502                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3503                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3504                         x86_patch (br [1], br [0]);
3505
3506                         if (breg != ins->inst_basereg)
3507                                 x86_pop_reg (code, X86_ESI);
3508
3509                         if (ins->dreg != X86_EAX) {
3510                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3511                                 x86_pop_reg (code, X86_EAX);
3512                         }
3513
3514                         if (ins->sreg2 != sreg2)
3515                                 x86_pop_reg (code, X86_EDX);
3516
3517                         break;
3518                 }
3519                 default:
3520                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3521                         g_assert_not_reached ();
3522                 }
3523
3524                 if ((code - cfg->native_code - offset) > max_len) {
3525                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3526                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3527                         g_assert_not_reached ();
3528                 }
3529                
3530                 cpos += max_len;
3531
3532                 last_ins = ins;
3533                 last_offset = offset;
3534                 
3535                 ins = ins->next;
3536         }
3537
3538         cfg->code_len = code - cfg->native_code;
3539 }
3540
3541 void
3542 mono_arch_register_lowlevel_calls (void)
3543 {
3544 }
3545
3546 void
3547 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3548 {
3549         MonoJumpInfo *patch_info;
3550         gboolean compile_aot = !run_cctors;
3551
3552         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3553                 unsigned char *ip = patch_info->ip.i + code;
3554                 const unsigned char *target;
3555
3556                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3557
3558                 if (compile_aot) {
3559                         switch (patch_info->type) {
3560                         case MONO_PATCH_INFO_BB:
3561                         case MONO_PATCH_INFO_LABEL:
3562                                 break;
3563                         default:
3564                                 /* No need to patch these */
3565                                 continue;
3566                         }
3567                 }
3568
3569                 switch (patch_info->type) {
3570                 case MONO_PATCH_INFO_IP:
3571                         *((gconstpointer *)(ip)) = target;
3572                         break;
3573                 case MONO_PATCH_INFO_CLASS_INIT: {
3574                         guint8 *code = ip;
3575                         /* Might already been changed to a nop */
3576                         x86_call_code (code, 0);
3577                         x86_patch (ip, target);
3578                         break;
3579                 }
3580                 case MONO_PATCH_INFO_ABS:
3581                 case MONO_PATCH_INFO_METHOD:
3582                 case MONO_PATCH_INFO_METHOD_JUMP:
3583                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3584                 case MONO_PATCH_INFO_BB:
3585                 case MONO_PATCH_INFO_LABEL:
3586                         x86_patch (ip, target);
3587                         break;
3588                 case MONO_PATCH_INFO_NONE:
3589                         break;
3590                 default: {
3591                         guint32 offset = mono_arch_get_patch_offset (ip);
3592                         *((gconstpointer *)(ip + offset)) = target;
3593                         break;
3594                 }
3595                 }
3596         }
3597 }
3598
3599 guint8 *
3600 mono_arch_emit_prolog (MonoCompile *cfg)
3601 {
3602         MonoMethod *method = cfg->method;
3603         MonoBasicBlock *bb;
3604         MonoMethodSignature *sig;
3605         MonoInst *inst;
3606         int alloc_size, pos, max_offset, i;
3607         guint8 *code;
3608
3609         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3610
3611         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3612                 cfg->code_size += 512;
3613
3614         code = cfg->native_code = g_malloc (cfg->code_size);
3615
3616         x86_push_reg (code, X86_EBP);
3617         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3618
3619         alloc_size = cfg->stack_offset;
3620         pos = 0;
3621
3622         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3623                 /* Might need to attach the thread to the JIT */
3624                 if (lmf_tls_offset != -1) {
3625                         guint8 *buf;
3626
3627                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3628                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3629                         buf = code;
3630                         x86_branch8 (code, X86_CC_NE, 0, 0);
3631                         x86_push_imm (code, cfg->domain);
3632                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3633                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3634                         x86_patch (buf, code);
3635 #ifdef PLATFORM_WIN32
3636                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3637                         /* FIXME: Add a separate key for LMF to avoid this */
3638                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3639 #endif
3640                 } else {
3641                         g_assert (!cfg->compile_aot);
3642                         x86_push_imm (code, cfg->domain);
3643                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3644                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3645                 }
3646         }
3647
3648         if (method->save_lmf) {
3649                 pos += sizeof (MonoLMF);
3650
3651                 /* save the current IP */
3652                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3653                 x86_push_imm_template (code);
3654
3655                 /* save all caller saved regs */
3656                 x86_push_reg (code, X86_EBP);
3657                 x86_push_reg (code, X86_ESI);
3658                 x86_push_reg (code, X86_EDI);
3659                 x86_push_reg (code, X86_EBX);
3660
3661                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3662                         /*
3663                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3664                          * through the mono_lmf_addr TLS variable.
3665                          */
3666                         /* %eax = previous_lmf */
3667                         x86_prefix (code, X86_GS_PREFIX);
3668                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3669                         /* skip esp + method_info + lmf */
3670                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
3671                         /* push previous_lmf */
3672                         x86_push_reg (code, X86_EAX);
3673                         /* new lmf = ESP */
3674                         x86_prefix (code, X86_GS_PREFIX);
3675                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
3676                 } else {
3677                         /* get the address of lmf for the current thread */
3678                         /* 
3679                          * This is performance critical so we try to use some tricks to make
3680                          * it fast.
3681                          */                                                                        
3682
3683                         if (lmf_addr_tls_offset != -1) {
3684                                 /* Load lmf quicky using the GS register */
3685                                 code = emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
3686 #ifdef PLATFORM_WIN32
3687                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3688                                 /* FIXME: Add a separate key for LMF to avoid this */
3689                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3690 #endif
3691                         } else {
3692                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3693                         }
3694
3695                         /* Skip esp + method info */
3696                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3697
3698                         /* push lmf */
3699                         x86_push_reg (code, X86_EAX); 
3700                         /* push *lfm (previous_lmf) */
3701                         x86_push_membase (code, X86_EAX, 0);
3702                         /* *(lmf) = ESP */
3703                         x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3704                 }
3705         } else {
3706
3707                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3708                         x86_push_reg (code, X86_EBX);
3709                         pos += 4;
3710                 }
3711
3712                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3713                         x86_push_reg (code, X86_EDI);
3714                         pos += 4;
3715                 }
3716
3717                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3718                         x86_push_reg (code, X86_ESI);
3719                         pos += 4;
3720                 }
3721         }
3722
3723         alloc_size -= pos;
3724
3725 #if __APPLE__
3726         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3727         {
3728                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3729                 if (tot & 4) {
3730                         tot += 4;
3731                         alloc_size += 4;
3732                 }
3733                 if (tot & 8) {
3734                         alloc_size += 8;
3735                 }
3736         }
3737 #endif
3738
3739         if (alloc_size) {
3740                 /* See mono_emit_stack_alloc */
3741 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3742                 guint32 remaining_size = alloc_size;
3743                 while (remaining_size >= 0x1000) {
3744                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3745                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3746                         remaining_size -= 0x1000;
3747                 }
3748                 if (remaining_size)
3749                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3750 #else
3751                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3752 #endif
3753         }
3754
3755 #if __APPLE_
3756         /* check the stack is aligned */
3757         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3758         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3759         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3760         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3761         x86_breakpoint (code);
3762 #endif
3763
3764         /* compute max_offset in order to use short forward jumps */
3765         max_offset = 0;
3766         if (cfg->opt & MONO_OPT_BRANCH) {
3767                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3768                         MonoInst *ins = bb->code;
3769                         bb->max_offset = max_offset;
3770
3771                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3772                                 max_offset += 6;
3773                         /* max alignment for loops */
3774                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3775                                 max_offset += LOOP_ALIGNMENT;
3776
3777                         while (ins) {
3778                                 if (ins->opcode == OP_LABEL)
3779                                         ins->inst_c1 = max_offset;
3780                                 
3781                                 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3782                                 ins = ins->next;
3783                         }
3784                 }
3785         }
3786
3787         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3788                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3789
3790         /* load arguments allocated to register from the stack */
3791         sig = mono_method_signature (method);
3792         pos = 0;
3793
3794         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3795                 inst = cfg->args [pos];
3796                 if (inst->opcode == OP_REGVAR) {
3797                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3798                         if (cfg->verbose_level > 2)
3799                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3800                 }
3801                 pos++;
3802         }
3803
3804         cfg->code_len = code - cfg->native_code;
3805
3806         return code;
3807 }
3808
3809 void
3810 mono_arch_emit_epilog (MonoCompile *cfg)
3811 {
3812         MonoMethod *method = cfg->method;
3813         MonoMethodSignature *sig = mono_method_signature (method);
3814         int quad, pos;
3815         guint32 stack_to_pop;
3816         guint8 *code;
3817         int max_epilog_size = 16;
3818         CallInfo *cinfo;
3819         
3820         if (cfg->method->save_lmf)
3821                 max_epilog_size += 128;
3822
3823         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3824                 cfg->code_size *= 2;
3825                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3826                 mono_jit_stats.code_reallocs++;
3827         }
3828
3829         code = cfg->native_code + cfg->code_len;
3830
3831         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3832                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3833
3834         /* the code restoring the registers must be kept in sync with OP_JMP */
3835         pos = 0;
3836         
3837         if (method->save_lmf) {
3838                 gint32 prev_lmf_reg;
3839                 gint32 lmf_offset = -sizeof (MonoLMF);
3840
3841                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3842                         /*
3843                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3844                          * through the mono_lmf_addr TLS variable.
3845                          */
3846                         /* reg = previous_lmf */
3847                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3848
3849                         /* lmf = previous_lmf */
3850                         x86_prefix (code, X86_GS_PREFIX);
3851                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
3852                 } else {
3853                         /* Find a spare register */
3854                         switch (sig->ret->type) {
3855                         case MONO_TYPE_I8:
3856                         case MONO_TYPE_U8:
3857                                 prev_lmf_reg = X86_EDI;
3858                                 cfg->used_int_regs |= (1 << X86_EDI);
3859                                 break;
3860                         default:
3861                                 prev_lmf_reg = X86_EDX;
3862                                 break;
3863                         }
3864
3865                         /* reg = previous_lmf */
3866                         x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3867
3868                         /* ecx = lmf */
3869                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3870
3871                         /* *(lmf) = previous_lmf */
3872                         x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3873                 }
3874
3875                 /* restore caller saved regs */
3876                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3877                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3878                 }
3879
3880                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3881                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3882                 }
3883                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3884                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3885                 }
3886
3887                 /* EBP is restored by LEAVE */
3888         } else {
3889                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3890                         pos -= 4;
3891                 }
3892                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3893                         pos -= 4;
3894                 }
3895                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3896                         pos -= 4;
3897                 }
3898
3899                 if (pos)
3900                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3901
3902                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3903                         x86_pop_reg (code, X86_ESI);
3904                 }
3905                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3906                         x86_pop_reg (code, X86_EDI);
3907                 }
3908                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3909                         x86_pop_reg (code, X86_EBX);
3910                 }
3911         }
3912
3913         /* Load returned vtypes into registers if needed */
3914         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
3915         if (cinfo->ret.storage == ArgValuetypeInReg) {
3916                 for (quad = 0; quad < 2; quad ++) {
3917                         switch (cinfo->ret.pair_storage [quad]) {
3918                         case ArgInIReg:
3919                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3920                                 break;
3921                         case ArgOnFloatFpStack:
3922                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3923                                 break;
3924                         case ArgOnDoubleFpStack:
3925                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3926                                 break;
3927                         case ArgNone:
3928                                 break;
3929                         default:
3930                                 g_assert_not_reached ();
3931                         }
3932                 }
3933         }
3934
3935         x86_leave (code);
3936
3937         if (CALLCONV_IS_STDCALL (sig)) {
3938                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3939
3940                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3941         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3942                 stack_to_pop = 4;
3943         else
3944                 stack_to_pop = 0;
3945
3946         if (stack_to_pop)
3947                 x86_ret_imm (code, stack_to_pop);
3948         else
3949                 x86_ret (code);
3950
3951         cfg->code_len = code - cfg->native_code;
3952
3953         g_assert (cfg->code_len < cfg->code_size);
3954 }
3955
3956 void
3957 mono_arch_emit_exceptions (MonoCompile *cfg)
3958 {
3959         MonoJumpInfo *patch_info;
3960         int nthrows, i;
3961         guint8 *code;
3962         MonoClass *exc_classes [16];
3963         guint8 *exc_throw_start [16], *exc_throw_end [16];
3964         guint32 code_size;
3965         int exc_count = 0;
3966
3967         /* Compute needed space */
3968         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3969                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3970                         exc_count++;
3971         }
3972
3973         /* 
3974          * make sure we have enough space for exceptions
3975          * 16 is the size of two push_imm instructions and a call
3976          */
3977         if (cfg->compile_aot)
3978                 code_size = exc_count * 32;
3979         else
3980                 code_size = exc_count * 16;
3981
3982         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3983                 cfg->code_size *= 2;
3984                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3985                 mono_jit_stats.code_reallocs++;
3986         }
3987
3988         code = cfg->native_code + cfg->code_len;
3989
3990         nthrows = 0;
3991         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3992                 switch (patch_info->type) {
3993                 case MONO_PATCH_INFO_EXC: {
3994                         MonoClass *exc_class;
3995                         guint8 *buf, *buf2;
3996                         guint32 throw_ip;
3997
3998                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3999
4000                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4001                         g_assert (exc_class);
4002                         throw_ip = patch_info->ip.i;
4003
4004                         /* Find a throw sequence for the same exception class */
4005                         for (i = 0; i < nthrows; ++i)
4006                                 if (exc_classes [i] == exc_class)
4007                                         break;
4008                         if (i < nthrows) {
4009                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4010                                 x86_jump_code (code, exc_throw_start [i]);
4011                                 patch_info->type = MONO_PATCH_INFO_NONE;
4012                         }
4013                         else {
4014                                 guint32 size;
4015
4016                                 /* Compute size of code following the push <OFFSET> */
4017                                 size = 5 + 5;
4018
4019                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4020                                         /* Use the shorter form */
4021                                         buf = buf2 = code;
4022                                         x86_push_imm (code, 0);
4023                                 }
4024                                 else {
4025                                         buf = code;
4026                                         x86_push_imm (code, 0xf0f0f0f0);
4027                                         buf2 = code;
4028                                 }
4029
4030                                 if (nthrows < 16) {
4031                                         exc_classes [nthrows] = exc_class;
4032                                         exc_throw_start [nthrows] = code;
4033                                 }
4034
4035                                 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
4036                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4037                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4038                                 patch_info->ip.i = code - cfg->native_code;
4039                                 x86_call_code (code, 0);
4040                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4041                                 while (buf < buf2)
4042                                         x86_nop (buf);
4043
4044                                 if (nthrows < 16) {
4045                                         exc_throw_end [nthrows] = code;
4046                                         nthrows ++;
4047                                 }
4048                         }
4049                         break;
4050                 }
4051                 default:
4052                         /* do nothing */
4053                         break;
4054                 }
4055         }
4056
4057         cfg->code_len = code - cfg->native_code;
4058
4059         g_assert (cfg->code_len < cfg->code_size);
4060 }
4061
4062 void
4063 mono_arch_flush_icache (guint8 *code, gint size)
4064 {
4065         /* not needed */
4066 }
4067
4068 void
4069 mono_arch_flush_register_windows (void)
4070 {
4071 }
4072
4073 /*
4074  * Support for fast access to the thread-local lmf structure using the GS
4075  * segment register on NPTL + kernel 2.6.x.
4076  */
4077
4078 static gboolean tls_offset_inited = FALSE;
4079
4080 void
4081 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4082 {
4083         if (!tls_offset_inited) {
4084                 if (!getenv ("MONO_NO_TLS")) {
4085 #ifdef PLATFORM_WIN32
4086                         /* 
4087                          * We need to init this multiple times, since when we are first called, the key might not
4088                          * be initialized yet.
4089                          */
4090                         appdomain_tls_offset = mono_domain_get_tls_key ();
4091                         lmf_tls_offset = mono_get_jit_tls_key ();
4092                         thread_tls_offset = mono_thread_get_tls_key ();
4093
4094                         /* Only 64 tls entries can be accessed using inline code */
4095                         if (appdomain_tls_offset >= 64)
4096                                 appdomain_tls_offset = -1;
4097                         if (lmf_tls_offset >= 64)
4098                                 lmf_tls_offset = -1;
4099                         if (thread_tls_offset >= 64)
4100                                 thread_tls_offset = -1;
4101 #else
4102 #if MONO_XEN_OPT
4103                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
4104 #endif
4105                         tls_offset_inited = TRUE;
4106                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4107                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4108                         lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
4109                         thread_tls_offset = mono_thread_get_tls_offset ();
4110 #endif
4111                 }
4112         }               
4113 }
4114
4115 void
4116 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4117 {
4118 }
4119
4120 void
4121 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4122 {
4123         MonoCallInst *call = (MonoCallInst*)inst;
4124         CallInfo *cinfo = get_call_info (cfg, cfg->mempool, inst->signature, FALSE);
4125
4126         /* add the this argument */
4127         if (this_reg != -1) {
4128                 if (cinfo->args [0].storage == ArgInIReg) {
4129                         MonoInst *this;
4130                         MONO_INST_NEW (cfg, this, OP_MOVE);
4131                         this->type = this_type;
4132                         this->sreg1 = this_reg;
4133                         this->dreg = mono_regstate_next_int (cfg->rs);
4134                         mono_bblock_add_inst (cfg->cbb, this);
4135
4136                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
4137                 }
4138                 else {
4139                         MonoInst *this;
4140                         MONO_INST_NEW (cfg, this, OP_OUTARG);
4141                         this->type = this_type;
4142                         this->sreg1 = this_reg;
4143                         mono_bblock_add_inst (cfg->cbb, this);
4144                 }
4145         }
4146
4147         if (vt_reg != -1) {
4148                 MonoInst *vtarg;
4149
4150                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4151                         /*
4152                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4153                          * the stack. Save the address here, so the call instruction can
4154                          * access it.
4155                          */
4156                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4157                         vtarg->inst_destbasereg = X86_ESP;
4158                         vtarg->inst_offset = inst->stack_usage;
4159                         vtarg->sreg1 = vt_reg;
4160                         mono_bblock_add_inst (cfg->cbb, vtarg);
4161                 }
4162                 else if (cinfo->ret.storage == ArgInIReg) {
4163                         /* The return address is passed in a register */
4164                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
4165                         vtarg->sreg1 = vt_reg;
4166                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
4167                         mono_bblock_add_inst (cfg->cbb, vtarg);
4168
4169                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
4170                 } else {
4171                         MonoInst *vtarg;
4172                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4173                         vtarg->type = STACK_MP;
4174                         vtarg->sreg1 = vt_reg;
4175                         mono_bblock_add_inst (cfg->cbb, vtarg);
4176                 }
4177         }
4178 }
4179
4180 #ifdef MONO_ARCH_HAVE_IMT
4181
4182 // Linear handler, the bsearch head compare is shorter
4183 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
4184 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
4185 //        x86_patch(ins,target)
4186 //[1 + 5] x86_jump_mem(inst,mem)
4187
4188 #define CMP_SIZE 6
4189 #define BR_SMALL_SIZE 2
4190 #define BR_LARGE_SIZE 5
4191 #define JUMP_IMM_SIZE 6
4192 #define ENABLE_WRONG_METHOD_CHECK 0
4193
4194 static int
4195 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
4196 {
4197         int i, distance = 0;
4198         for (i = start; i < target; ++i)
4199                 distance += imt_entries [i]->chunk_size;
4200         return distance;
4201 }
4202
4203 /*
4204  * LOCKING: called with the domain lock held
4205  */
4206 gpointer
4207 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count)
4208 {
4209         int i;
4210         int size = 0;
4211         guint8 *code, *start;
4212
4213         for (i = 0; i < count; ++i) {
4214                 MonoIMTCheckItem *item = imt_entries [i];
4215                 if (item->is_equals) {
4216                         if (item->check_target_idx) {
4217                                 if (!item->compare_done)
4218                                         item->chunk_size += CMP_SIZE;
4219                                 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
4220                         } else {
4221                                 item->chunk_size += JUMP_IMM_SIZE;
4222 #if ENABLE_WRONG_METHOD_CHECK
4223                                 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
4224 #endif
4225                         }
4226                 } else {
4227                         item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
4228                         imt_entries [item->check_target_idx]->compare_done = TRUE;
4229                 }
4230                 size += item->chunk_size;
4231         }
4232         code = mono_code_manager_reserve (domain->code_mp, size);
4233         start = code;
4234         for (i = 0; i < count; ++i) {
4235                 MonoIMTCheckItem *item = imt_entries [i];
4236                 item->code_target = code;
4237                 if (item->is_equals) {
4238                         if (item->check_target_idx) {
4239                                 if (!item->compare_done)
4240                                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4241                                 item->jmp_code = code;
4242                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);
4243                                 x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
4244                         } else {
4245                                 /* enable the commented code to assert on wrong method */
4246 #if ENABLE_WRONG_METHOD_CHECK
4247                                 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4248                                 item->jmp_code = code;
4249                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);
4250 #endif
4251                                 x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
4252 #if ENABLE_WRONG_METHOD_CHECK
4253                                 x86_patch (item->jmp_code, code);
4254                                 x86_breakpoint (code);
4255                                 item->jmp_code = NULL;
4256 #endif
4257                         }
4258                 } else {
4259                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4260                         item->jmp_code = code;
4261                         if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
4262                                 x86_branch8 (code, X86_CC_GE, 0, FALSE);
4263                         else
4264                                 x86_branch32 (code, X86_CC_GE, 0, FALSE);
4265                 }
4266         }
4267         /* patch the branches to get to the target items */
4268         for (i = 0; i < count; ++i) {
4269                 MonoIMTCheckItem *item = imt_entries [i];
4270                 if (item->jmp_code) {
4271                         if (item->check_target_idx) {
4272                                 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
4273                         }
4274                 }
4275         }
4276                 
4277         mono_stats.imt_thunks_size += code - start;
4278         g_assert (code - start <= size);
4279         return start;
4280 }
4281
4282 MonoMethod*
4283 mono_arch_find_imt_method (gpointer *regs, guint8 *code)
4284 {
4285         return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
4286 }
4287
4288 MonoObject*
4289 mono_arch_find_this_argument (gpointer *regs, MonoMethod *method)
4290 {
4291         MonoMethodSignature *sig = mono_method_signature (method);
4292         CallInfo *cinfo = get_call_info (NULL, NULL, sig, FALSE);
4293         int this_argument_offset;
4294         MonoObject *this_argument;
4295
4296         /* 
4297          * this is the offset of the this arg from esp as saved at the start of 
4298          * mono_arch_create_trampoline_code () in tramp-x86.c.
4299          */
4300         this_argument_offset = 5;
4301         if (MONO_TYPE_ISSTRUCT (sig->ret) && (cinfo->ret.storage == ArgOnStack))
4302                 this_argument_offset++;
4303
4304         this_argument = * (MonoObject**) (((guint8*) regs [X86_ESP]) + this_argument_offset * sizeof (gpointer));
4305
4306         g_free (cinfo);
4307         return this_argument;
4308 }
4309 #endif
4310
4311 MonoInst*
4312 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4313 {
4314         MonoInst *ins = NULL;
4315
4316         if (cmethod->klass == mono_defaults.math_class) {
4317                 if (strcmp (cmethod->name, "Sin") == 0) {
4318                         MONO_INST_NEW (cfg, ins, OP_SIN);
4319                         ins->inst_i0 = args [0];
4320                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4321                         MONO_INST_NEW (cfg, ins, OP_COS);
4322                         ins->inst_i0 = args [0];
4323                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4324                         MONO_INST_NEW (cfg, ins, OP_TAN);
4325                         ins->inst_i0 = args [0];
4326                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4327                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4328                         ins->inst_i0 = args [0];
4329                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4330                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4331                         ins->inst_i0 = args [0];
4332                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4333                         MONO_INST_NEW (cfg, ins, OP_ABS);
4334                         ins->inst_i0 = args [0];
4335                 }
4336 #if 0
4337                 /* OP_FREM is not IEEE compatible */
4338                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4339                         MONO_INST_NEW (cfg, ins, OP_FREM);
4340                         ins->inst_i0 = args [0];
4341                         ins->inst_i1 = args [1];
4342                 }
4343 #endif
4344         } else if (cmethod->klass == mono_defaults.thread_class &&
4345                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
4346                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
4347         } else if(cmethod->klass->image == mono_defaults.corlib &&
4348                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4349                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4350
4351                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4352                         MonoInst *ins_iconst;
4353
4354                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4355                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4356                         ins_iconst->inst_c0 = 1;
4357
4358                         ins->inst_i0 = args [0];
4359                         ins->inst_i1 = ins_iconst;
4360                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4361                         MonoInst *ins_iconst;
4362
4363                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4364                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4365                         ins_iconst->inst_c0 = -1;
4366
4367                         ins->inst_i0 = args [0];
4368                         ins->inst_i1 = ins_iconst;
4369                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4370                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4371
4372                         ins->inst_i0 = args [0];
4373                         ins->inst_i1 = args [1];
4374                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4375                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4376
4377                         ins->inst_i0 = args [0];
4378                         ins->inst_i1 = args [1];
4379                 }
4380         }
4381
4382         return ins;
4383 }
4384
4385
4386 gboolean
4387 mono_arch_print_tree (MonoInst *tree, int arity)
4388 {
4389         return 0;
4390 }
4391
4392 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4393 {
4394         MonoInst* ins;
4395         
4396         if (appdomain_tls_offset == -1)
4397                 return NULL;
4398
4399         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4400         ins->inst_offset = appdomain_tls_offset;
4401         return ins;
4402 }
4403
4404 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4405 {
4406         MonoInst* ins;
4407
4408         if (thread_tls_offset == -1)
4409                 return NULL;
4410
4411         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4412         ins->inst_offset = thread_tls_offset;
4413         return ins;
4414 }
4415
4416 guint32
4417 mono_arch_get_patch_offset (guint8 *code)
4418 {
4419         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4420                 return 2;
4421         else if ((code [0] == 0xba))
4422                 return 1;
4423         else if ((code [0] == 0x68))
4424                 /* push IMM */
4425                 return 1;
4426         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4427                 /* push <OFFSET>(<REG>) */
4428                 return 2;
4429         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4430                 /* call *<OFFSET>(<REG>) */
4431                 return 2;
4432         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4433                 /* fldl <ADDR> */
4434                 return 2;
4435         else if ((code [0] == 0x58) && (code [1] == 0x05))
4436                 /* pop %eax; add <OFFSET>, %eax */
4437                 return 2;
4438         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4439                 /* pop <REG>; add <OFFSET>, <REG> */
4440                 return 3;
4441         else {
4442                 g_assert_not_reached ();
4443                 return -1;
4444         }
4445 }
4446
4447 gpointer*
4448 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4449 {
4450         guint8 reg = 0;
4451         gint32 disp = 0;
4452
4453         /* go to the start of the call instruction
4454          *
4455          * address_byte = (m << 6) | (o << 3) | reg
4456          * call opcode: 0xff address_byte displacement
4457          * 0xff m=1,o=2 imm8
4458          * 0xff m=2,o=2 imm32
4459          */
4460         code -= 6;
4461
4462         /* 
4463          * A given byte sequence can match more than case here, so we have to be
4464          * really careful about the ordering of the cases. Longer sequences
4465          * come first.
4466          */
4467         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
4468                 /*
4469                  * This is an interface call
4470                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
4471                  * ff 10                   call   *(%eax)
4472                  */
4473                 reg = x86_modrm_rm (code [5]);
4474                 disp = 0;
4475 #ifdef MONO_ARCH_HAVE_IMT
4476         } else if ((code [-2] == 0xba) && (code [3] == 0xff) && (x86_modrm_mod (code [4]) == 1) && (x86_modrm_reg (code [4]) == 2) && ((signed char)code [5] < 0)) {
4477                 /* IMT-based interface calls: with MONO_ARCH_IMT_REG == edx
4478                  * ba 14 f8 28 08          mov    $0x828f814,%edx
4479                  * ff 50 fc                call   *0xfffffffc(%eax)
4480                  */
4481                 reg = code [4] & 0x07;
4482                 disp = (signed char)code [5];
4483 #endif
4484         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4485                 reg = code [4] & 0x07;
4486                 disp = (signed char)code [5];
4487         } else {
4488                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4489                         reg = code [1] & 0x07;
4490                         disp = *((gint32*)(code + 2));
4491                 } else if ((code [1] == 0xe8)) {
4492                         return NULL;
4493                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4494                         /*
4495                          * This is a interface call
4496                          * 8b 40 30   mov    0x30(%eax),%eax
4497                          * ff 10      call   *(%eax)
4498                          */
4499                         disp = 0;
4500                         reg = code [5] & 0x07;
4501                 }
4502                 else
4503                         return NULL;
4504         }
4505
4506         return (gpointer*)(((gint32)(regs [reg])) + disp);
4507 }
4508
4509 gpointer
4510 mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code)
4511 {
4512         guint32 esp = regs [X86_ESP];
4513         CallInfo *cinfo;
4514         gpointer res;
4515
4516         cinfo = get_call_info (NULL, NULL, sig, FALSE);
4517
4518         /*
4519          * The stack looks like:
4520          * <other args>
4521          * <this=delegate>
4522          * <possible vtype return address>
4523          * <return addr>
4524          * <4 pointers pushed by mono_arch_create_trampoline_code ()>
4525          */
4526         res = (((MonoObject**)esp) [5 + (cinfo->args [0].offset / 4)]);
4527         g_free (cinfo);
4528         return res;
4529 }
4530
4531 #define MAX_ARCH_DELEGATE_PARAMS 10
4532
4533 gpointer
4534 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
4535 {
4536         guint8 *code, *start;
4537
4538         if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
4539                 return NULL;
4540
4541         /* FIXME: Support more cases */
4542         if (MONO_TYPE_ISSTRUCT (sig->ret))
4543                 return NULL;
4544
4545         /*
4546          * The stack contains:
4547          * <delegate>
4548          * <return addr>
4549          */
4550
4551         if (has_target) {
4552                 static guint8* cached = NULL;
4553                 mono_mini_arch_lock ();
4554                 if (cached) {
4555                         mono_mini_arch_unlock ();
4556                         return cached;
4557                 }
4558                 
4559                 start = code = mono_global_codeman_reserve (64);
4560
4561                 /* Replace the this argument with the target */
4562                 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4563                 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
4564                 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
4565                 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4566
4567                 g_assert ((code - start) < 64);
4568
4569                 cached = start;
4570
4571                 mono_mini_arch_unlock ();
4572         } else {
4573                 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
4574                 int i = 0;
4575                 /* 8 for mov_reg and jump, plus 8 for each parameter */
4576                 int code_reserve = 8 + (sig->param_count * 8);
4577
4578                 for (i = 0; i < sig->param_count; ++i)
4579                         if (!mono_is_regsize_var (sig->params [i]))
4580                                 return NULL;
4581
4582                 mono_mini_arch_lock ();
4583                 code = cache [sig->param_count];
4584                 if (code) {
4585                         mono_mini_arch_unlock ();
4586                         return code;
4587                 }
4588
4589                 /*
4590                  * The stack contains:
4591                  * <args in reverse order>
4592                  * <delegate>
4593                  * <return addr>
4594                  *
4595                  * and we need:
4596                  * <args in reverse order>
4597                  * <return addr>
4598                  * 
4599                  * without unbalancing the stack.
4600                  * So move each arg up a spot in the stack (overwriting un-needed 'this' arg)
4601                  * and leaving original spot of first arg as placeholder in stack so
4602                  * when callee pops stack everything works.
4603                  */
4604
4605                 start = code = mono_global_codeman_reserve (code_reserve);
4606
4607                 /* store delegate for access to method_ptr */
4608                 x86_mov_reg_membase (code, X86_ECX, X86_ESP, 4, 4);
4609
4610                 /* move args up */
4611                 for (i = 0; i < sig->param_count; ++i) {
4612                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, (i+2)*4, 4);
4613                         x86_mov_membase_reg (code, X86_ESP, (i+1)*4, X86_EAX, 4);
4614                 }
4615
4616                 x86_jump_membase (code, X86_ECX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4617
4618                 g_assert ((code - start) < code_reserve);
4619
4620                 cache [sig->param_count] = start;
4621
4622                 mono_mini_arch_unlock ();
4623         }
4624
4625         return start;
4626 }