0767fcc83b6f3be97c4fd0681b1f514d81f88967
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
17
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/metadata/mono-debug.h>
23 #include <mono/utils/mono-math.h>
24
25 #include "trace.h"
26 #include "mini-x86.h"
27 #include "inssel.h"
28 #include "cpu-x86.h"
29
30 /* On windows, these hold the key returned by TlsAlloc () */
31 static gint lmf_tls_offset = -1;
32 static gint lmf_addr_tls_offset = -1;
33 static gint appdomain_tls_offset = -1;
34 static gint thread_tls_offset = -1;
35
36 #ifdef MONO_XEN_OPT
37 static gboolean optimize_for_xen = TRUE;
38 #else
39 #define optimize_for_xen 0
40 #endif
41
42 #ifdef PLATFORM_WIN32
43 static gboolean is_win32 = TRUE;
44 #else
45 static gboolean is_win32 = FALSE;
46 #endif
47
48 /* This mutex protects architecture specific caches */
49 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
50 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
51 static CRITICAL_SECTION mini_arch_mutex;
52
53 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
54
55 #define ARGS_OFFSET 8
56
57 #ifdef PLATFORM_WIN32
58 /* Under windows, the default pinvoke calling convention is stdcall */
59 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
60 #else
61 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
62 #endif
63
64 #define NOT_IMPLEMENTED g_assert_not_reached ()
65
66 MonoBreakpointInfo
67 mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
68
69 const char*
70 mono_arch_regname (int reg)
71 {
72         switch (reg) {
73         case X86_EAX: return "%eax";
74         case X86_EBX: return "%ebx";
75         case X86_ECX: return "%ecx";
76         case X86_EDX: return "%edx";
77         case X86_ESP: return "%esp";    
78         case X86_EBP: return "%ebp";
79         case X86_EDI: return "%edi";
80         case X86_ESI: return "%esi";
81         }
82         return "unknown";
83 }
84
85 const char*
86 mono_arch_fregname (int reg)
87 {
88         switch (reg) {
89         case 0:
90                 return "%fr0";
91         case 1:
92                 return "%fr1";
93         case 2:
94                 return "%fr2";
95         case 3:
96                 return "%fr3";
97         case 4:
98                 return "%fr4";
99         case 5:
100                 return "%fr5";
101         case 6:
102                 return "%fr6";
103         case 7:
104                 return "%fr7";
105         default:
106                 return "unknown";
107         }
108 }
109
110 typedef enum {
111         ArgInIReg,
112         ArgInFloatSSEReg,
113         ArgInDoubleSSEReg,
114         ArgOnStack,
115         ArgValuetypeInReg,
116         ArgOnFloatFpStack,
117         ArgOnDoubleFpStack,
118         ArgNone
119 } ArgStorage;
120
121 typedef struct {
122         gint16 offset;
123         gint8  reg;
124         ArgStorage storage;
125
126         /* Only if storage == ArgValuetypeInReg */
127         ArgStorage pair_storage [2];
128         gint8 pair_regs [2];
129 } ArgInfo;
130
131 typedef struct {
132         int nargs;
133         guint32 stack_usage;
134         guint32 reg_usage;
135         guint32 freg_usage;
136         gboolean need_stack_align;
137         guint32 stack_align_amount;
138         ArgInfo ret;
139         ArgInfo sig_cookie;
140         ArgInfo args [1];
141 } CallInfo;
142
143 #define PARAM_REGS 0
144
145 #define FLOAT_PARAM_REGS 0
146
147 static X86_Reg_No param_regs [] = { 0 };
148
149 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
150 #define SMALL_STRUCTS_IN_REGS
151 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
152 #endif
153
154 static void inline
155 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
156 {
157     ainfo->offset = *stack_size;
158
159     if (*gr >= PARAM_REGS) {
160                 ainfo->storage = ArgOnStack;
161                 (*stack_size) += sizeof (gpointer);
162     }
163     else {
164                 ainfo->storage = ArgInIReg;
165                 ainfo->reg = param_regs [*gr];
166                 (*gr) ++;
167     }
168 }
169
170 static void inline
171 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
172 {
173         ainfo->offset = *stack_size;
174
175         g_assert (PARAM_REGS == 0);
176         
177         ainfo->storage = ArgOnStack;
178         (*stack_size) += sizeof (gpointer) * 2;
179 }
180
181 static void inline
182 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
183 {
184     ainfo->offset = *stack_size;
185
186     if (*gr >= FLOAT_PARAM_REGS) {
187                 ainfo->storage = ArgOnStack;
188                 (*stack_size) += is_double ? 8 : 4;
189     }
190     else {
191                 /* A double register */
192                 if (is_double)
193                         ainfo->storage = ArgInDoubleSSEReg;
194                 else
195                         ainfo->storage = ArgInFloatSSEReg;
196                 ainfo->reg = *gr;
197                 (*gr) += 1;
198     }
199 }
200
201
202 static void
203 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
204                gboolean is_return,
205                guint32 *gr, guint32 *fr, guint32 *stack_size)
206 {
207         guint32 size;
208         MonoClass *klass;
209
210         klass = mono_class_from_mono_type (type);
211         if (sig->pinvoke) 
212                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
213         else 
214                 size = mini_type_stack_size (gsctx, &klass->byval_arg, NULL);
215
216 #ifdef SMALL_STRUCTS_IN_REGS
217         if (sig->pinvoke && is_return) {
218                 MonoMarshalType *info;
219
220                 /*
221                  * the exact rules are not very well documented, the code below seems to work with the 
222                  * code generated by gcc 3.3.3 -mno-cygwin.
223                  */
224                 info = mono_marshal_load_type_info (klass);
225                 g_assert (info);
226
227                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
228
229                 /* Special case structs with only a float member */
230                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
231                         ainfo->storage = ArgValuetypeInReg;
232                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
233                         return;
234                 }
235                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
236                         ainfo->storage = ArgValuetypeInReg;
237                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
238                         return;
239                 }               
240                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
241                         ainfo->storage = ArgValuetypeInReg;
242                         ainfo->pair_storage [0] = ArgInIReg;
243                         ainfo->pair_regs [0] = return_regs [0];
244                         if (info->native_size > 4) {
245                                 ainfo->pair_storage [1] = ArgInIReg;
246                                 ainfo->pair_regs [1] = return_regs [1];
247                         }
248                         return;
249                 }
250         }
251 #endif
252
253         ainfo->offset = *stack_size;
254         ainfo->storage = ArgOnStack;
255         *stack_size += ALIGN_TO (size, sizeof (gpointer));
256 }
257
258 /*
259  * get_call_info:
260  *
261  *  Obtain information about a call according to the calling convention.
262  * For x86 ELF, see the "System V Application Binary Interface Intel386 
263  * Architecture Processor Supplment, Fourth Edition" document for more
264  * information.
265  * For x86 win32, see ???.
266  */
267 static CallInfo*
268 get_call_info (MonoCompile *cfg, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
269 {
270         guint32 i, gr, fr;
271         MonoType *ret_type;
272         int n = sig->hasthis + sig->param_count;
273         guint32 stack_size = 0;
274         CallInfo *cinfo;
275         MonoGenericSharingContext *gsctx = cfg ? cfg->generic_sharing_context : NULL;
276
277         if (mp)
278                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
279         else
280                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
281
282         gr = 0;
283         fr = 0;
284
285         /* return value */
286         {
287                 ret_type = mono_type_get_underlying_type (sig->ret);
288                 ret_type = mini_get_basic_type_from_generic (gsctx, ret_type);
289                 switch (ret_type->type) {
290                 case MONO_TYPE_BOOLEAN:
291                 case MONO_TYPE_I1:
292                 case MONO_TYPE_U1:
293                 case MONO_TYPE_I2:
294                 case MONO_TYPE_U2:
295                 case MONO_TYPE_CHAR:
296                 case MONO_TYPE_I4:
297                 case MONO_TYPE_U4:
298                 case MONO_TYPE_I:
299                 case MONO_TYPE_U:
300                 case MONO_TYPE_PTR:
301                 case MONO_TYPE_FNPTR:
302                 case MONO_TYPE_CLASS:
303                 case MONO_TYPE_OBJECT:
304                 case MONO_TYPE_SZARRAY:
305                 case MONO_TYPE_ARRAY:
306                 case MONO_TYPE_STRING:
307                         cinfo->ret.storage = ArgInIReg;
308                         cinfo->ret.reg = X86_EAX;
309                         break;
310                 case MONO_TYPE_U8:
311                 case MONO_TYPE_I8:
312                         cinfo->ret.storage = ArgInIReg;
313                         cinfo->ret.reg = X86_EAX;
314                         break;
315                 case MONO_TYPE_R4:
316                         cinfo->ret.storage = ArgOnFloatFpStack;
317                         break;
318                 case MONO_TYPE_R8:
319                         cinfo->ret.storage = ArgOnDoubleFpStack;
320                         break;
321                 case MONO_TYPE_GENERICINST:
322                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
323                                 cinfo->ret.storage = ArgInIReg;
324                                 cinfo->ret.reg = X86_EAX;
325                                 break;
326                         }
327                         /* Fall through */
328                 case MONO_TYPE_VALUETYPE: {
329                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
330
331                         add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
332                         if (cinfo->ret.storage == ArgOnStack)
333                                 /* The caller passes the address where the value is stored */
334                                 add_general (&gr, &stack_size, &cinfo->ret);
335                         break;
336                 }
337                 case MONO_TYPE_TYPEDBYREF:
338                         /* Same as a valuetype with size 24 */
339                         add_general (&gr, &stack_size, &cinfo->ret);
340                         ;
341                         break;
342                 case MONO_TYPE_VOID:
343                         cinfo->ret.storage = ArgNone;
344                         break;
345                 default:
346                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
347                 }
348         }
349
350         /* this */
351         if (sig->hasthis)
352                 add_general (&gr, &stack_size, cinfo->args + 0);
353
354         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
355                 gr = PARAM_REGS;
356                 fr = FLOAT_PARAM_REGS;
357                 
358                 /* Emit the signature cookie just before the implicit arguments */
359                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
360         }
361
362         for (i = 0; i < sig->param_count; ++i) {
363                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
364                 MonoType *ptype;
365
366                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
367                         /* We allways pass the sig cookie on the stack for simplicity */
368                         /* 
369                          * Prevent implicit arguments + the sig cookie from being passed 
370                          * in registers.
371                          */
372                         gr = PARAM_REGS;
373                         fr = FLOAT_PARAM_REGS;
374
375                         /* Emit the signature cookie just before the implicit arguments */
376                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
377                 }
378
379                 if (sig->params [i]->byref) {
380                         add_general (&gr, &stack_size, ainfo);
381                         continue;
382                 }
383                 ptype = mono_type_get_underlying_type (sig->params [i]);
384                 ptype = mini_get_basic_type_from_generic (gsctx, ptype);
385                 switch (ptype->type) {
386                 case MONO_TYPE_BOOLEAN:
387                 case MONO_TYPE_I1:
388                 case MONO_TYPE_U1:
389                         add_general (&gr, &stack_size, ainfo);
390                         break;
391                 case MONO_TYPE_I2:
392                 case MONO_TYPE_U2:
393                 case MONO_TYPE_CHAR:
394                         add_general (&gr, &stack_size, ainfo);
395                         break;
396                 case MONO_TYPE_I4:
397                 case MONO_TYPE_U4:
398                         add_general (&gr, &stack_size, ainfo);
399                         break;
400                 case MONO_TYPE_I:
401                 case MONO_TYPE_U:
402                 case MONO_TYPE_PTR:
403                 case MONO_TYPE_FNPTR:
404                 case MONO_TYPE_CLASS:
405                 case MONO_TYPE_OBJECT:
406                 case MONO_TYPE_STRING:
407                 case MONO_TYPE_SZARRAY:
408                 case MONO_TYPE_ARRAY:
409                         add_general (&gr, &stack_size, ainfo);
410                         break;
411                 case MONO_TYPE_GENERICINST:
412                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
413                                 add_general (&gr, &stack_size, ainfo);
414                                 break;
415                         }
416                         /* Fall through */
417                 case MONO_TYPE_VALUETYPE:
418                         add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
419                         break;
420                 case MONO_TYPE_TYPEDBYREF:
421                         stack_size += sizeof (MonoTypedRef);
422                         ainfo->storage = ArgOnStack;
423                         break;
424                 case MONO_TYPE_U8:
425                 case MONO_TYPE_I8:
426                         add_general_pair (&gr, &stack_size, ainfo);
427                         break;
428                 case MONO_TYPE_R4:
429                         add_float (&fr, &stack_size, ainfo, FALSE);
430                         break;
431                 case MONO_TYPE_R8:
432                         add_float (&fr, &stack_size, ainfo, TRUE);
433                         break;
434                 default:
435                         g_error ("unexpected type 0x%x", ptype->type);
436                         g_assert_not_reached ();
437                 }
438         }
439
440         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
441                 gr = PARAM_REGS;
442                 fr = FLOAT_PARAM_REGS;
443                 
444                 /* Emit the signature cookie just before the implicit arguments */
445                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
446         }
447
448 #if defined(__APPLE__)
449         if ((stack_size % 16) != 0) { 
450                 cinfo->need_stack_align = TRUE;
451                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
452         }
453 #endif
454
455         cinfo->stack_usage = stack_size;
456         cinfo->reg_usage = gr;
457         cinfo->freg_usage = fr;
458         return cinfo;
459 }
460
461 /*
462  * mono_arch_get_argument_info:
463  * @csig:  a method signature
464  * @param_count: the number of parameters to consider
465  * @arg_info: an array to store the result infos
466  *
467  * Gathers information on parameters such as size, alignment and
468  * padding. arg_info should be large enought to hold param_count + 1 entries. 
469  *
470  * Returns the size of the activation frame.
471  */
472 int
473 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
474 {
475         int k, frame_size = 0;
476         int size, pad;
477         guint32 align;
478         int offset = 8;
479         CallInfo *cinfo;
480
481         cinfo = get_call_info (NULL, NULL, csig, FALSE);
482
483         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
484                 frame_size += sizeof (gpointer);
485                 offset += 4;
486         }
487
488         arg_info [0].offset = offset;
489
490         if (csig->hasthis) {
491                 frame_size += sizeof (gpointer);
492                 offset += 4;
493         }
494
495         arg_info [0].size = frame_size;
496
497         for (k = 0; k < param_count; k++) {
498                 
499                 if (csig->pinvoke)
500                         size = mono_type_native_stack_size (csig->params [k], &align);
501                 else {
502                         int ialign;
503                         size = mini_type_stack_size (NULL, csig->params [k], &ialign);
504                         align = ialign;
505                 }
506
507                 /* ignore alignment for now */
508                 align = 1;
509
510                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
511                 arg_info [k].pad = pad;
512                 frame_size += size;
513                 arg_info [k + 1].pad = 0;
514                 arg_info [k + 1].size = size;
515                 offset += pad;
516                 arg_info [k + 1].offset = offset;
517                 offset += size;
518         }
519
520         align = MONO_ARCH_FRAME_ALIGNMENT;
521         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
522         arg_info [k].pad = pad;
523
524         g_free (cinfo);
525
526         return frame_size;
527 }
528
529 static const guchar cpuid_impl [] = {
530         0x55,                           /* push   %ebp */
531         0x89, 0xe5,                     /* mov    %esp,%ebp */
532         0x53,                           /* push   %ebx */
533         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
534         0x0f, 0xa2,                     /* cpuid   */
535         0x50,                           /* push   %eax */
536         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
537         0x89, 0x18,                     /* mov    %ebx,(%eax) */
538         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
539         0x89, 0x08,                     /* mov    %ecx,(%eax) */
540         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
541         0x89, 0x10,                     /* mov    %edx,(%eax) */
542         0x58,                           /* pop    %eax */
543         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
544         0x89, 0x02,                     /* mov    %eax,(%edx) */
545         0x5b,                           /* pop    %ebx */
546         0xc9,                           /* leave   */
547         0xc3,                           /* ret     */
548 };
549
550 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
551
552 static int 
553 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
554 {
555         int have_cpuid = 0;
556 #ifndef _MSC_VER
557         __asm__  __volatile__ (
558                 "pushfl\n"
559                 "popl %%eax\n"
560                 "movl %%eax, %%edx\n"
561                 "xorl $0x200000, %%eax\n"
562                 "pushl %%eax\n"
563                 "popfl\n"
564                 "pushfl\n"
565                 "popl %%eax\n"
566                 "xorl %%edx, %%eax\n"
567                 "andl $0x200000, %%eax\n"
568                 "movl %%eax, %0"
569                 : "=r" (have_cpuid)
570                 :
571                 : "%eax", "%edx"
572         );
573 #else
574         __asm {
575                 pushfd
576                 pop eax
577                 mov edx, eax
578                 xor eax, 0x200000
579                 push eax
580                 popfd
581                 pushfd
582                 pop eax
583                 xor eax, edx
584                 and eax, 0x200000
585                 mov have_cpuid, eax
586         }
587 #endif
588         if (have_cpuid) {
589                 /* Have to use the code manager to get around WinXP DEP */
590                 static CpuidFunc func = NULL;
591                 void *ptr;
592                 if (!func) {
593                         ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
594                         memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
595                         func = (CpuidFunc)ptr;
596                 }
597                 func (id, p_eax, p_ebx, p_ecx, p_edx);
598
599                 /*
600                  * We use this approach because of issues with gcc and pic code, see:
601                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
602                 __asm__ __volatile__ ("cpuid"
603                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
604                         : "a" (id));
605                 */
606                 return 1;
607         }
608         return 0;
609 }
610
611 /*
612  * Initialize the cpu to execute managed code.
613  */
614 void
615 mono_arch_cpu_init (void)
616 {
617         /* spec compliance requires running with double precision */
618 #ifndef _MSC_VER
619         guint16 fpcw;
620
621         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
622         fpcw &= ~X86_FPCW_PRECC_MASK;
623         fpcw |= X86_FPCW_PREC_DOUBLE;
624         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
625         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
626 #else
627         _control87 (_PC_53, MCW_PC);
628 #endif
629 }
630
631 /*
632  * Initialize architecture specific code.
633  */
634 void
635 mono_arch_init (void)
636 {
637         InitializeCriticalSection (&mini_arch_mutex);
638 }
639
640 /*
641  * Cleanup architecture specific code.
642  */
643 void
644 mono_arch_cleanup (void)
645 {
646         DeleteCriticalSection (&mini_arch_mutex);
647 }
648
649 /*
650  * This function returns the optimizations supported on this cpu.
651  */
652 guint32
653 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
654 {
655         int eax, ebx, ecx, edx;
656         guint32 opts = 0;
657         
658         *exclude_mask = 0;
659         /* Feature Flags function, flags returned in EDX. */
660         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
661                 if (edx & (1 << 15)) {
662                         opts |= MONO_OPT_CMOV;
663                         if (edx & 1)
664                                 opts |= MONO_OPT_FCMOV;
665                         else
666                                 *exclude_mask |= MONO_OPT_FCMOV;
667                 } else
668                         *exclude_mask |= MONO_OPT_CMOV;
669                 if (edx & (1 << 26))
670                         opts |= MONO_OPT_SSE2;
671                 else
672                         *exclude_mask |= MONO_OPT_SSE2;
673         }
674         return opts;
675 }
676
677 /*
678  * Determine whenever the trap whose info is in SIGINFO is caused by
679  * integer overflow.
680  */
681 gboolean
682 mono_arch_is_int_overflow (void *sigctx, void *info)
683 {
684         MonoContext ctx;
685         guint8* ip;
686
687         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
688
689         ip = (guint8*)ctx.eip;
690
691         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
692                 gint32 reg;
693
694                 /* idiv REG */
695                 switch (x86_modrm_rm (ip [1])) {
696                 case X86_EAX:
697                         reg = ctx.eax;
698                         break;
699                 case X86_ECX:
700                         reg = ctx.ecx;
701                         break;
702                 case X86_EDX:
703                         reg = ctx.edx;
704                         break;
705                 case X86_EBX:
706                         reg = ctx.ebx;
707                         break;
708                 case X86_ESI:
709                         reg = ctx.esi;
710                         break;
711                 case X86_EDI:
712                         reg = ctx.edi;
713                         break;
714                 default:
715                         g_assert_not_reached ();
716                         reg = -1;
717                 }
718
719                 if (reg == -1)
720                         return TRUE;
721         }
722                         
723         return FALSE;
724 }
725
726 GList *
727 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
728 {
729         GList *vars = NULL;
730         int i;
731
732         for (i = 0; i < cfg->num_varinfo; i++) {
733                 MonoInst *ins = cfg->varinfo [i];
734                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
735
736                 /* unused vars */
737                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
738                         continue;
739
740                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
741                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
742                         continue;
743
744                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
745                  * 8bit quantities in caller saved registers on x86 */
746                 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
747                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
748                         g_assert (i == vmv->idx);
749                         vars = g_list_prepend (vars, vmv);
750                 }
751         }
752
753         vars = mono_varlist_sort (cfg, vars, 0);
754
755         return vars;
756 }
757
758 GList *
759 mono_arch_get_global_int_regs (MonoCompile *cfg)
760 {
761         GList *regs = NULL;
762
763         /* we can use 3 registers for global allocation */
764         regs = g_list_prepend (regs, (gpointer)X86_EBX);
765         regs = g_list_prepend (regs, (gpointer)X86_ESI);
766         regs = g_list_prepend (regs, (gpointer)X86_EDI);
767
768         return regs;
769 }
770
771 /*
772  * mono_arch_regalloc_cost:
773  *
774  *  Return the cost, in number of memory references, of the action of 
775  * allocating the variable VMV into a register during global register
776  * allocation.
777  */
778 guint32
779 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
780 {
781         MonoInst *ins = cfg->varinfo [vmv->idx];
782
783         if (cfg->method->save_lmf)
784                 /* The register is already saved */
785                 return (ins->opcode == OP_ARG) ? 1 : 0;
786         else
787                 /* push+pop+possible load if it is an argument */
788                 return (ins->opcode == OP_ARG) ? 3 : 2;
789 }
790  
791 /*
792  * Set var information according to the calling convention. X86 version.
793  * The locals var stuff should most likely be split in another method.
794  */
795 void
796 mono_arch_allocate_vars (MonoCompile *cfg)
797 {
798         MonoMethodSignature *sig;
799         MonoMethodHeader *header;
800         MonoInst *inst;
801         guint32 locals_stack_size, locals_stack_align;
802         int i, offset;
803         gint32 *offsets;
804         CallInfo *cinfo;
805
806         header = mono_method_get_header (cfg->method);
807         sig = mono_method_signature (cfg->method);
808
809         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
810
811         cfg->frame_reg = MONO_ARCH_BASEREG;
812         offset = 0;
813
814         /* Reserve space to save LMF and caller saved registers */
815
816         if (cfg->method->save_lmf) {
817                 offset += sizeof (MonoLMF);
818         } else {
819                 if (cfg->used_int_regs & (1 << X86_EBX)) {
820                         offset += 4;
821                 }
822
823                 if (cfg->used_int_regs & (1 << X86_EDI)) {
824                         offset += 4;
825                 }
826
827                 if (cfg->used_int_regs & (1 << X86_ESI)) {
828                         offset += 4;
829                 }
830         }
831
832         switch (cinfo->ret.storage) {
833         case ArgValuetypeInReg:
834                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
835                 offset += 8;
836                 cfg->ret->opcode = OP_REGOFFSET;
837                 cfg->ret->inst_basereg = X86_EBP;
838                 cfg->ret->inst_offset = - offset;
839                 break;
840         default:
841                 break;
842         }
843
844         /* Allocate locals */
845         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
846         if (locals_stack_align) {
847                 offset += (locals_stack_align - 1);
848                 offset &= ~(locals_stack_align - 1);
849         }
850         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
851                 if (offsets [i] != -1) {
852                         MonoInst *inst = cfg->varinfo [i];
853                         inst->opcode = OP_REGOFFSET;
854                         inst->inst_basereg = X86_EBP;
855                         inst->inst_offset = - (offset + offsets [i]);
856                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
857                 }
858         }
859         offset += locals_stack_size;
860
861
862         /*
863          * Allocate arguments+return value
864          */
865
866         switch (cinfo->ret.storage) {
867         case ArgOnStack:
868                 cfg->ret->opcode = OP_REGOFFSET;
869                 cfg->ret->inst_basereg = X86_EBP;
870                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
871                 break;
872         case ArgValuetypeInReg:
873                 break;
874         case ArgInIReg:
875                 cfg->ret->opcode = OP_REGVAR;
876                 cfg->ret->inst_c0 = cinfo->ret.reg;
877                 break;
878         case ArgNone:
879         case ArgOnFloatFpStack:
880         case ArgOnDoubleFpStack:
881                 break;
882         default:
883                 g_assert_not_reached ();
884         }
885
886         if (sig->call_convention == MONO_CALL_VARARG) {
887                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
888                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
889         }
890
891         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
892                 ArgInfo *ainfo = &cinfo->args [i];
893                 inst = cfg->args [i];
894                 if (inst->opcode != OP_REGVAR) {
895                         inst->opcode = OP_REGOFFSET;
896                         inst->inst_basereg = X86_EBP;
897                 }
898                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
899         }
900
901         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
902         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
903
904         cfg->stack_offset = offset;
905 }
906
907 void
908 mono_arch_create_vars (MonoCompile *cfg)
909 {
910         MonoMethodSignature *sig;
911         CallInfo *cinfo;
912
913         sig = mono_method_signature (cfg->method);
914
915         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
916
917         if (cinfo->ret.storage == ArgValuetypeInReg)
918                 cfg->ret_var_is_local = TRUE;
919 }
920
921 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
922  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
923  */
924
925 static void
926 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
927 {
928         MonoInst *arg;
929         MonoMethodSignature *tmp_sig;
930         MonoInst *sig_arg;
931
932         /* FIXME: Add support for signature tokens to AOT */
933         cfg->disable_aot = TRUE;
934         MONO_INST_NEW (cfg, arg, OP_OUTARG);
935
936         /*
937          * mono_ArgIterator_Setup assumes the signature cookie is 
938          * passed first and all the arguments which were before it are
939          * passed on the stack after the signature. So compensate by 
940          * passing a different signature.
941          */
942         tmp_sig = mono_metadata_signature_dup (call->signature);
943         tmp_sig->param_count -= call->signature->sentinelpos;
944         tmp_sig->sentinelpos = 0;
945         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
946
947         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
948         sig_arg->inst_p0 = tmp_sig;
949
950         arg->inst_left = sig_arg;
951         arg->type = STACK_PTR;
952         MONO_INST_LIST_ADD (&arg->node, &call->out_args);
953 }
954
955 /*
956  * It is expensive to adjust esp for each individual fp argument pushed on the stack
957  * so we try to do it just once when we have multiple fp arguments in a row.
958  * We don't use this mechanism generally because for int arguments the generated code
959  * is slightly bigger and new generation cpus optimize away the dependency chains
960  * created by push instructions on the esp value.
961  * fp_arg_setup is the first argument in the execution sequence where the esp register
962  * is modified.
963  */
964 static int
965 collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
966 {
967         int fp_space = 0;
968         MonoType *t;
969
970         for (; start_arg < sig->param_count; ++start_arg) {
971                 t = mono_type_get_underlying_type (sig->params [start_arg]);
972                 if (!t->byref && t->type == MONO_TYPE_R8) {
973                         fp_space += sizeof (double);
974                         *fp_arg_setup = start_arg;
975                 } else {
976                         break;
977                 }
978         }
979         return fp_space;
980 }
981
982 /* 
983  * take the arguments and generate the arch-specific
984  * instructions to properly call the function in call.
985  * This includes pushing, moving arguments to the right register
986  * etc.
987  */
988 MonoCallInst*
989 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
990         MonoInst *arg, *in;
991         MonoMethodSignature *sig;
992         int i, n;
993         CallInfo *cinfo;
994         int sentinelpos = 0;
995         int fp_args_space = 0, fp_args_offset = 0, fp_arg_setup = -1;
996
997         sig = call->signature;
998         n = sig->param_count + sig->hasthis;
999
1000         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
1001
1002         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1003                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
1004
1005         for (i = 0; i < n; ++i) {
1006                 ArgInfo *ainfo = cinfo->args + i;
1007
1008                 /* Emit the signature cookie just before the implicit arguments */
1009                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
1010                         emit_sig_cookie (cfg, call);
1011                 }
1012
1013                 if (is_virtual && i == 0) {
1014                         /* the argument will be attached to the call instrucion */
1015                         in = call->args [i];
1016                 } else {
1017                         MonoType *t;
1018
1019                         if (i >= sig->hasthis)
1020                                 t = sig->params [i - sig->hasthis];
1021                         else
1022                                 t = &mono_defaults.int_class->byval_arg;
1023                         t = mono_type_get_underlying_type (t);
1024
1025                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1026                         in = call->args [i];
1027                         arg->cil_code = in->cil_code;
1028                         arg->inst_left = in;
1029                         arg->type = in->type;
1030                         MONO_INST_LIST_ADD (&arg->node, &call->out_args);
1031
1032                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
1033                                 guint32 size, align;
1034
1035                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
1036                                         size = sizeof (MonoTypedRef);
1037                                         align = sizeof (gpointer);
1038                                 }
1039                                 else
1040                                         if (sig->pinvoke)
1041                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
1042                                         else {
1043                                                 int ialign;
1044                                                 size = mini_type_stack_size (cfg->generic_sharing_context, &in->klass->byval_arg, &ialign);
1045                                                 align = ialign;
1046                                         }
1047                                 arg->opcode = OP_OUTARG_VT;
1048                                 arg->klass = in->klass;
1049                                 arg->backend.is_pinvoke = sig->pinvoke;
1050                                 arg->inst_imm = size; 
1051                         }
1052                         else {
1053                                 switch (ainfo->storage) {
1054                                 case ArgOnStack:
1055                                         arg->opcode = OP_OUTARG;
1056                                         if (!t->byref) {
1057                                                 if (t->type == MONO_TYPE_R4) {
1058                                                         arg->opcode = OP_OUTARG_R4;
1059                                                 } else if (t->type == MONO_TYPE_R8) {
1060                                                         arg->opcode = OP_OUTARG_R8;
1061                                                         /* we store in the upper bits of backen.arg_info the needed
1062                                                          * esp adjustment and in the lower bits the offset from esp
1063                                                          * where the arg needs to be stored
1064                                                          */
1065                                                         if (!fp_args_space) {
1066                                                                 fp_args_space = collect_fp_stack_space (sig, i - sig->hasthis, &fp_arg_setup);
1067                                                                 fp_args_offset = fp_args_space;
1068                                                         }
1069                                                         arg->backend.arg_info = fp_args_space - fp_args_offset;
1070                                                         fp_args_offset -= sizeof (double);
1071                                                         if (i - sig->hasthis == fp_arg_setup) {
1072                                                                 arg->backend.arg_info |= fp_args_space << 16;
1073                                                         }
1074                                                         if (fp_args_offset == 0) {
1075                                                                 /* the allocated esp stack is finished:
1076                                                                  * prepare for an eventual second run of fp args
1077                                                                  */
1078                                                                 fp_args_space = 0;
1079                                                         }
1080                                                 }
1081                                         }
1082                                         break;
1083                                 default:
1084                                         g_assert_not_reached ();
1085                                 }
1086                         }
1087                 }
1088         }
1089
1090         /* Handle the case where there are no implicit arguments */
1091         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1092                 emit_sig_cookie (cfg, call);
1093         }
1094
1095         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1096                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1097                         MonoInst *zero_inst;
1098                         /*
1099                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1100                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1101                          * before calling the function. So we add a dummy instruction to represent pushing the 
1102                          * struct return address to the stack. The return address will be saved to this stack slot 
1103                          * by the code emitted in this_vret_args.
1104                          */
1105                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1106                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1107                         zero_inst->inst_p0 = 0;
1108                         arg->inst_left = zero_inst;
1109                         arg->type = STACK_PTR;
1110                         MONO_INST_LIST_ADD (&arg->node, &call->out_args);
1111                 } else {
1112                         /* if the function returns a struct, the called method already does a ret $0x4 */
1113                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1114                                 cinfo->stack_usage -= 4;
1115                 }
1116         }
1117         
1118         call->stack_usage = cinfo->stack_usage;
1119
1120 #if defined(__APPLE__)
1121         if (cinfo->need_stack_align) {
1122                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1123                 arg->inst_c0 = cinfo->stack_align_amount;
1124                 MONO_INST_LIST_ADD (&arg->node, &call->out_args);
1125         }
1126 #endif 
1127
1128         return call;
1129 }
1130
1131 /*
1132  * Allow tracing to work with this interface (with an optional argument)
1133  */
1134 void*
1135 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1136 {
1137         guchar *code = p;
1138
1139 #if __APPLE__
1140         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1141 #endif
1142
1143         /* if some args are passed in registers, we need to save them here */
1144         x86_push_reg (code, X86_EBP);
1145
1146         if (cfg->compile_aot) {
1147                 x86_push_imm (code, cfg->method);
1148                 x86_mov_reg_imm (code, X86_EAX, func);
1149                 x86_call_reg (code, X86_EAX);
1150         } else {
1151                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1152                 x86_push_imm (code, cfg->method);
1153                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1154                 x86_call_code (code, 0);
1155         }
1156 #if __APPLE__
1157         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 16);
1158 #else
1159         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1160 #endif
1161
1162         return code;
1163 }
1164
1165 enum {
1166         SAVE_NONE,
1167         SAVE_STRUCT,
1168         SAVE_EAX,
1169         SAVE_EAX_EDX,
1170         SAVE_FP
1171 };
1172
1173 void*
1174 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1175 {
1176         guchar *code = p;
1177         int arg_size = 0, save_mode = SAVE_NONE;
1178         MonoMethod *method = cfg->method;
1179         
1180         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1181         case MONO_TYPE_VOID:
1182                 /* special case string .ctor icall */
1183                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1184                         save_mode = SAVE_EAX;
1185                 else
1186                         save_mode = SAVE_NONE;
1187                 break;
1188         case MONO_TYPE_I8:
1189         case MONO_TYPE_U8:
1190                 save_mode = SAVE_EAX_EDX;
1191                 break;
1192         case MONO_TYPE_R4:
1193         case MONO_TYPE_R8:
1194                 save_mode = SAVE_FP;
1195                 break;
1196         case MONO_TYPE_GENERICINST:
1197                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1198                         save_mode = SAVE_EAX;
1199                         break;
1200                 }
1201                 /* Fall through */
1202         case MONO_TYPE_VALUETYPE:
1203                 save_mode = SAVE_STRUCT;
1204                 break;
1205         default:
1206                 save_mode = SAVE_EAX;
1207                 break;
1208         }
1209
1210         switch (save_mode) {
1211         case SAVE_EAX_EDX:
1212                 x86_push_reg (code, X86_EDX);
1213                 x86_push_reg (code, X86_EAX);
1214                 if (enable_arguments) {
1215                         x86_push_reg (code, X86_EDX);
1216                         x86_push_reg (code, X86_EAX);
1217                         arg_size = 8;
1218                 }
1219                 break;
1220         case SAVE_EAX:
1221                 x86_push_reg (code, X86_EAX);
1222                 if (enable_arguments) {
1223                         x86_push_reg (code, X86_EAX);
1224                         arg_size = 4;
1225                 }
1226                 break;
1227         case SAVE_FP:
1228                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1229                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1230                 if (enable_arguments) {
1231                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1232                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1233                         arg_size = 8;
1234                 }
1235                 break;
1236         case SAVE_STRUCT:
1237                 if (enable_arguments) {
1238                         x86_push_membase (code, X86_EBP, 8);
1239                         arg_size = 4;
1240                 }
1241                 break;
1242         case SAVE_NONE:
1243         default:
1244                 break;
1245         }
1246
1247         if (cfg->compile_aot) {
1248                 x86_push_imm (code, method);
1249                 x86_mov_reg_imm (code, X86_EAX, func);
1250                 x86_call_reg (code, X86_EAX);
1251         } else {
1252                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1253                 x86_push_imm (code, method);
1254                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1255                 x86_call_code (code, 0);
1256         }
1257         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1258
1259         switch (save_mode) {
1260         case SAVE_EAX_EDX:
1261                 x86_pop_reg (code, X86_EAX);
1262                 x86_pop_reg (code, X86_EDX);
1263                 break;
1264         case SAVE_EAX:
1265                 x86_pop_reg (code, X86_EAX);
1266                 break;
1267         case SAVE_FP:
1268                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1269                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1270                 break;
1271         case SAVE_NONE:
1272         default:
1273                 break;
1274         }
1275
1276         return code;
1277 }
1278
1279 #define EMIT_COND_BRANCH(ins,cond,sign) \
1280 if (ins->flags & MONO_INST_BRLABEL) { \
1281         if (ins->inst_i0->inst_c0) { \
1282                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1283         } else { \
1284                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1285                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1286                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1287                         x86_branch8 (code, cond, 0, sign); \
1288                 else \
1289                         x86_branch32 (code, cond, 0, sign); \
1290         } \
1291 } else { \
1292         if (ins->inst_true_bb->native_offset) { \
1293                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1294         } else { \
1295                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1296                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1297                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1298                         x86_branch8 (code, cond, 0, sign); \
1299                 else \
1300                         x86_branch32 (code, cond, 0, sign); \
1301         } \
1302 }
1303
1304 /*  
1305  *      Emit an exception if condition is fail and
1306  *  if possible do a directly branch to target 
1307  */
1308 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1309         do {                                                        \
1310                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1311                 if (tins == NULL) {                                                                             \
1312                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1313                                         MONO_PATCH_INFO_EXC, exc_name);  \
1314                         x86_branch32 (code, cond, 0, signed);               \
1315                 } else {        \
1316                         EMIT_COND_BRANCH (tins, cond, signed);  \
1317                 }                       \
1318         } while (0); 
1319
1320 #define EMIT_FPCOMPARE(code) do { \
1321         x86_fcompp (code); \
1322         x86_fnstsw (code); \
1323 } while (0); 
1324
1325
1326 static guint8*
1327 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1328 {
1329         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1330         x86_call_code (code, 0);
1331
1332         return code;
1333 }
1334
1335 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1336
1337 /*
1338  * peephole_pass_1:
1339  *
1340  *   Perform peephole opts which should/can be performed before local regalloc
1341  */
1342 static void
1343 peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1344 {
1345         MonoInst *ins, *n;
1346
1347         MONO_INST_LIST_FOR_EACH_ENTRY_SAFE (ins, n, &bb->ins_list, node) {
1348                 MonoInst *last_ins = mono_inst_list_prev (&ins->node, &bb->ins_list);
1349                 switch (ins->opcode) {
1350                 case OP_IADD_IMM:
1351                 case OP_ADD_IMM:
1352                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1353                                 /* 
1354                                  * X86_LEA is like ADD, but doesn't have the
1355                                  * sreg1==dreg restriction.
1356                                  */
1357                                 ins->opcode = OP_X86_LEA_MEMBASE;
1358                                 ins->inst_basereg = ins->sreg1;
1359                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1360                                 ins->opcode = OP_X86_INC_REG;
1361                         break;
1362                 case OP_SUB_IMM:
1363                 case OP_ISUB_IMM:
1364                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1365                                 ins->opcode = OP_X86_LEA_MEMBASE;
1366                                 ins->inst_basereg = ins->sreg1;
1367                                 ins->inst_imm = -ins->inst_imm;
1368                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1369                                 ins->opcode = OP_X86_DEC_REG;
1370                         break;
1371                 case OP_COMPARE_IMM:
1372                 case OP_ICOMPARE_IMM:
1373                         /* OP_COMPARE_IMM (reg, 0) 
1374                          * --> 
1375                          * OP_X86_TEST_NULL (reg) 
1376                          */
1377                         if (!ins->inst_imm)
1378                                 ins->opcode = OP_X86_TEST_NULL;
1379                         break;
1380                 case OP_X86_COMPARE_MEMBASE_IMM:
1381                         /* 
1382                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1383                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1384                          * -->
1385                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1386                          * OP_COMPARE_IMM reg, imm
1387                          *
1388                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1389                          */
1390                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1391                             ins->inst_basereg == last_ins->inst_destbasereg &&
1392                             ins->inst_offset == last_ins->inst_offset) {
1393                                         ins->opcode = OP_COMPARE_IMM;
1394                                         ins->sreg1 = last_ins->sreg1;
1395
1396                                         /* check if we can remove cmp reg,0 with test null */
1397                                         if (!ins->inst_imm)
1398                                                 ins->opcode = OP_X86_TEST_NULL;
1399                                 }
1400
1401                         break;
1402                 case OP_LOAD_MEMBASE:
1403                 case OP_LOADI4_MEMBASE:
1404                         /* 
1405                          * Note: if reg1 = reg2 the load op is removed
1406                          *
1407                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1408                          * OP_LOAD_MEMBASE offset(basereg), reg2
1409                          * -->
1410                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1411                          * OP_MOVE reg1, reg2
1412                          */
1413                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1414                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1415                             ins->inst_basereg == last_ins->inst_destbasereg &&
1416                             ins->inst_offset == last_ins->inst_offset) {
1417                                 if (ins->dreg == last_ins->sreg1) {
1418                                         MONO_DEL_INS (ins);
1419                                         continue;
1420                                 } else {
1421                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1422                                         ins->opcode = OP_MOVE;
1423                                         ins->sreg1 = last_ins->sreg1;
1424                                 }
1425
1426                         /* 
1427                          * Note: reg1 must be different from the basereg in the second load
1428                          * Note: if reg1 = reg2 is equal then second load is removed
1429                          *
1430                          * OP_LOAD_MEMBASE offset(basereg), reg1
1431                          * OP_LOAD_MEMBASE offset(basereg), reg2
1432                          * -->
1433                          * OP_LOAD_MEMBASE offset(basereg), reg1
1434                          * OP_MOVE reg1, reg2
1435                          */
1436                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1437                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1438                               ins->inst_basereg != last_ins->dreg &&
1439                               ins->inst_basereg == last_ins->inst_basereg &&
1440                               ins->inst_offset == last_ins->inst_offset) {
1441
1442                                 if (ins->dreg == last_ins->dreg) {
1443                                         MONO_DEL_INS (ins);
1444                                         continue;
1445                                 } else {
1446                                         ins->opcode = OP_MOVE;
1447                                         ins->sreg1 = last_ins->dreg;
1448                                 }
1449
1450                                 //g_assert_not_reached ();
1451
1452 #if 0
1453                         /* 
1454                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1455                          * OP_LOAD_MEMBASE offset(basereg), reg
1456                          * -->
1457                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1458                          * OP_ICONST reg, imm
1459                          */
1460                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1461                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1462                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1463                                    ins->inst_offset == last_ins->inst_offset) {
1464                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1465                                 ins->opcode = OP_ICONST;
1466                                 ins->inst_c0 = last_ins->inst_imm;
1467                                 g_assert_not_reached (); // check this rule
1468 #endif
1469                         }
1470                         break;
1471                 case OP_LOADU1_MEMBASE:
1472                 case OP_LOADI1_MEMBASE:
1473                         /* 
1474                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1475                          * OP_LOAD_MEMBASE offset(basereg), reg2
1476                          * -->
1477                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1478                          * CONV_I2/U2 reg1, reg2
1479                          */
1480                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1481                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1482                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1483                                         ins->inst_offset == last_ins->inst_offset) {
1484                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1485                                 ins->sreg1 = last_ins->sreg1;
1486                         }
1487                         break;
1488                 case OP_LOADU2_MEMBASE:
1489                 case OP_LOADI2_MEMBASE:
1490                         /* 
1491                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1492                          * OP_LOAD_MEMBASE offset(basereg), reg2
1493                          * -->
1494                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1495                          * CONV_I2/U2 reg1, reg2
1496                          */
1497                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1498                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1499                                         ins->inst_offset == last_ins->inst_offset) {
1500                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1501                                 ins->sreg1 = last_ins->sreg1;
1502                         }
1503                         break;
1504                 case CEE_CONV_I4:
1505                 case CEE_CONV_U4:
1506                 case OP_ICONV_TO_I4:
1507                 case OP_MOVE:
1508                         /*
1509                          * Removes:
1510                          *
1511                          * OP_MOVE reg, reg 
1512                          */
1513                         if (ins->dreg == ins->sreg1) {
1514                                 MONO_DEL_INS (ins);
1515                                 continue;
1516                         }
1517                         /* 
1518                          * Removes:
1519                          *
1520                          * OP_MOVE sreg, dreg 
1521                          * OP_MOVE dreg, sreg
1522                          */
1523                         if (last_ins && last_ins->opcode == OP_MOVE &&
1524                             ins->sreg1 == last_ins->dreg &&
1525                             ins->dreg == last_ins->sreg1) {
1526                                 MONO_DEL_INS (ins);
1527                                 continue;
1528                         }
1529                         break;
1530                         
1531                 case OP_X86_PUSH_MEMBASE:
1532                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1533                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1534                             ins->inst_basereg == last_ins->inst_destbasereg &&
1535                             ins->inst_offset == last_ins->inst_offset) {
1536                                     ins->opcode = OP_X86_PUSH;
1537                                     ins->sreg1 = last_ins->sreg1;
1538                         }
1539                         break;
1540                 }
1541         }
1542 }
1543
1544 static void
1545 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1546 {
1547         MonoInst *ins, *n;
1548
1549         MONO_INST_LIST_FOR_EACH_ENTRY_SAFE (ins, n, &bb->ins_list, node) {
1550                 MonoInst *last_ins = mono_inst_list_prev (&ins->node, &bb->ins_list);
1551
1552                 switch (ins->opcode) {
1553                 case OP_ICONST: {
1554                         MonoInst *next;
1555
1556                         /* reg = 0 -> XOR (reg, reg) */
1557                         /* XOR sets cflags on x86, so we cant do it always */
1558                         next = mono_inst_list_next (&ins->node, &bb->ins_list);
1559                         if (ins->inst_c0 == 0 && (!next ||
1560                                         (next && INST_IGNORES_CFLAGS (next->opcode)))) {
1561                                 MonoInst *ins2;
1562
1563                                 ins->opcode = OP_IXOR;
1564                                 ins->sreg1 = ins->dreg;
1565                                 ins->sreg2 = ins->dreg;
1566
1567                                 /* 
1568                                  * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG 
1569                                  * since it takes 3 bytes instead of 7.
1570                                  */
1571                                 for (ins2 = mono_inst_list_next (&ins->node, &bb->ins_list); ins2;
1572                                                 ins2 = mono_inst_list_next (&ins2->node, &bb->ins_list)) {
1573                                         if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1574                                                 ins2->opcode = OP_STORE_MEMBASE_REG;
1575                                                 ins2->sreg1 = ins->dreg;
1576                                         } else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1577                                                 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1578                                                 ins2->sreg1 = ins->dreg;
1579                                         } else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1580                                                 /* Continue iteration */
1581                                         } else
1582                                                 break;
1583                                 }
1584                         }
1585                         break;
1586                 }
1587                 case OP_IADD_IMM:
1588                 case OP_ADD_IMM:
1589                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1590                                 ins->opcode = OP_X86_INC_REG;
1591                         break;
1592                 case OP_ISUB_IMM:
1593                 case OP_SUB_IMM:
1594                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1595                                 ins->opcode = OP_X86_DEC_REG;
1596                         break;
1597                 case OP_X86_COMPARE_MEMBASE_IMM:
1598                         /* 
1599                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1600                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1601                          * -->
1602                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1603                          * OP_COMPARE_IMM reg, imm
1604                          *
1605                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1606                          */
1607                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1608                             ins->inst_basereg == last_ins->inst_destbasereg &&
1609                             ins->inst_offset == last_ins->inst_offset) {
1610                                         ins->opcode = OP_COMPARE_IMM;
1611                                         ins->sreg1 = last_ins->sreg1;
1612
1613                                         /* check if we can remove cmp reg,0 with test null */
1614                                         if (!ins->inst_imm)
1615                                                 ins->opcode = OP_X86_TEST_NULL;
1616                                 }
1617
1618                         break;
1619                 case OP_LOAD_MEMBASE:
1620                 case OP_LOADI4_MEMBASE:
1621                         /* 
1622                          * Note: if reg1 = reg2 the load op is removed
1623                          *
1624                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1625                          * OP_LOAD_MEMBASE offset(basereg), reg2
1626                          * -->
1627                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1628                          * OP_MOVE reg1, reg2
1629                          */
1630                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1631                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1632                             ins->inst_basereg == last_ins->inst_destbasereg &&
1633                             ins->inst_offset == last_ins->inst_offset) {
1634                                 if (ins->dreg == last_ins->sreg1) {
1635                                         MONO_DEL_INS (ins);
1636                                         continue;
1637                                 } else {
1638                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1639                                         ins->opcode = OP_MOVE;
1640                                         ins->sreg1 = last_ins->sreg1;
1641                                 }
1642
1643                         /* 
1644                          * Note: reg1 must be different from the basereg in the second load
1645                          * Note: if reg1 = reg2 is equal then second load is removed
1646                          *
1647                          * OP_LOAD_MEMBASE offset(basereg), reg1
1648                          * OP_LOAD_MEMBASE offset(basereg), reg2
1649                          * -->
1650                          * OP_LOAD_MEMBASE offset(basereg), reg1
1651                          * OP_MOVE reg1, reg2
1652                          */
1653                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1654                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1655                               ins->inst_basereg != last_ins->dreg &&
1656                               ins->inst_basereg == last_ins->inst_basereg &&
1657                               ins->inst_offset == last_ins->inst_offset) {
1658
1659                                 if (ins->dreg == last_ins->dreg) {
1660                                         MONO_DEL_INS (ins);
1661                                         continue;
1662                                 } else {
1663                                         ins->opcode = OP_MOVE;
1664                                         ins->sreg1 = last_ins->dreg;
1665                                 }
1666
1667                                 //g_assert_not_reached ();
1668
1669 #if 0
1670                         /* 
1671                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1672                          * OP_LOAD_MEMBASE offset(basereg), reg
1673                          * -->
1674                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1675                          * OP_ICONST reg, imm
1676                          */
1677                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1678                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1679                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1680                                    ins->inst_offset == last_ins->inst_offset) {
1681                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1682                                 ins->opcode = OP_ICONST;
1683                                 ins->inst_c0 = last_ins->inst_imm;
1684                                 g_assert_not_reached (); // check this rule
1685 #endif
1686                         }
1687                         break;
1688                 case OP_LOADU1_MEMBASE:
1689                 case OP_LOADI1_MEMBASE:
1690                         /* 
1691                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1692                          * OP_LOAD_MEMBASE offset(basereg), reg2
1693                          * -->
1694                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1695                          * CONV_I2/U2 reg1, reg2
1696                          */
1697                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1698                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1699                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1700                                         ins->inst_offset == last_ins->inst_offset) {
1701                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1702                                 ins->sreg1 = last_ins->sreg1;
1703                         }
1704                         break;
1705                 case OP_LOADU2_MEMBASE:
1706                 case OP_LOADI2_MEMBASE:
1707                         /* 
1708                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1709                          * OP_LOAD_MEMBASE offset(basereg), reg2
1710                          * -->
1711                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1712                          * CONV_I2/U2 reg1, reg2
1713                          */
1714                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1715                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1716                                         ins->inst_offset == last_ins->inst_offset) {
1717                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1718                                 ins->sreg1 = last_ins->sreg1;
1719                         }
1720                         break;
1721                 case CEE_CONV_I4:
1722                 case CEE_CONV_U4:
1723                 case OP_ICONV_TO_I4:
1724                 case OP_MOVE:
1725                         /*
1726                          * Removes:
1727                          *
1728                          * OP_MOVE reg, reg 
1729                          */
1730                         if (ins->dreg == ins->sreg1) {
1731                                 MONO_DEL_INS (ins);
1732                                 continue;
1733                         }
1734                         /* 
1735                          * Removes:
1736                          *
1737                          * OP_MOVE sreg, dreg 
1738                          * OP_MOVE dreg, sreg
1739                          */
1740                         if (last_ins && last_ins->opcode == OP_MOVE &&
1741                             ins->sreg1 == last_ins->dreg &&
1742                             ins->dreg == last_ins->sreg1) {
1743                                 MONO_DEL_INS (ins);
1744                                 continue;
1745                         }
1746                         break;
1747                 case OP_X86_PUSH_MEMBASE:
1748                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1749                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1750                             ins->inst_basereg == last_ins->inst_destbasereg &&
1751                             ins->inst_offset == last_ins->inst_offset) {
1752                                     ins->opcode = OP_X86_PUSH;
1753                                     ins->sreg1 = last_ins->sreg1;
1754                         }
1755                         break;
1756                 }
1757         }
1758 }
1759
1760 static const int 
1761 branch_cc_table [] = {
1762         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1763         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1764         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1765 };
1766
1767 /* Maps CMP_... constants to X86_CC_... constants */
1768 static const int
1769 cc_table [] = {
1770         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1771         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1772 };
1773
1774 static const int
1775 cc_signed_table [] = {
1776         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1777         FALSE, FALSE, FALSE, FALSE
1778 };
1779
1780 void
1781 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1782 {
1783         if (cfg->opt & MONO_OPT_PEEPHOLE)
1784                 peephole_pass_1 (cfg, bb);
1785
1786         mono_local_regalloc (cfg, bb);
1787 }
1788
1789 static unsigned char*
1790 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1791 {
1792 #define XMM_TEMP_REG 0
1793         if (cfg->opt & MONO_OPT_SSE2 && size < 8) {
1794                 /* optimize by assigning a local var for this use so we avoid
1795                  * the stack manipulations */
1796                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1797                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1798                 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1799                 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1800                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1801                 if (size == 1)
1802                         x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1803                 else if (size == 2)
1804                         x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1805                 return code;
1806         }
1807         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1808         x86_fnstcw_membase(code, X86_ESP, 0);
1809         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1810         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1811         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1812         x86_fldcw_membase (code, X86_ESP, 2);
1813         if (size == 8) {
1814                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1815                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1816                 x86_pop_reg (code, dreg);
1817                 /* FIXME: need the high register 
1818                  * x86_pop_reg (code, dreg_high);
1819                  */
1820         } else {
1821                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1822                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1823                 x86_pop_reg (code, dreg);
1824         }
1825         x86_fldcw_membase (code, X86_ESP, 0);
1826         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1827
1828         if (size == 1)
1829                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1830         else if (size == 2)
1831                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1832         return code;
1833 }
1834
1835 static unsigned char*
1836 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1837 {
1838         int sreg = tree->sreg1;
1839         int need_touch = FALSE;
1840
1841 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1842         need_touch = TRUE;
1843 #endif
1844
1845         if (need_touch) {
1846                 guint8* br[5];
1847
1848                 /*
1849                  * Under Windows:
1850                  * If requested stack size is larger than one page,
1851                  * perform stack-touch operation
1852                  */
1853                 /*
1854                  * Generate stack probe code.
1855                  * Under Windows, it is necessary to allocate one page at a time,
1856                  * "touching" stack after each successful sub-allocation. This is
1857                  * because of the way stack growth is implemented - there is a
1858                  * guard page before the lowest stack page that is currently commited.
1859                  * Stack normally grows sequentially so OS traps access to the
1860                  * guard page and commits more pages when needed.
1861                  */
1862                 x86_test_reg_imm (code, sreg, ~0xFFF);
1863                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1864
1865                 br[2] = code; /* loop */
1866                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1867                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1868
1869                 /* 
1870                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1871                  * that follows only initializes the last part of the area.
1872                  */
1873                 /* Same as the init code below with size==0x1000 */
1874                 if (tree->flags & MONO_INST_INIT) {
1875                         x86_push_reg (code, X86_EAX);
1876                         x86_push_reg (code, X86_ECX);
1877                         x86_push_reg (code, X86_EDI);
1878                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1879                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1880                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1881                         x86_cld (code);
1882                         x86_prefix (code, X86_REP_PREFIX);
1883                         x86_stosl (code);
1884                         x86_pop_reg (code, X86_EDI);
1885                         x86_pop_reg (code, X86_ECX);
1886                         x86_pop_reg (code, X86_EAX);
1887                 }
1888
1889                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1890                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1891                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1892                 x86_patch (br[3], br[2]);
1893                 x86_test_reg_reg (code, sreg, sreg);
1894                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1895                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1896
1897                 br[1] = code; x86_jump8 (code, 0);
1898
1899                 x86_patch (br[0], code);
1900                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1901                 x86_patch (br[1], code);
1902                 x86_patch (br[4], code);
1903         }
1904         else
1905                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1906
1907         if (tree->flags & MONO_INST_INIT) {
1908                 int offset = 0;
1909                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1910                         x86_push_reg (code, X86_EAX);
1911                         offset += 4;
1912                 }
1913                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1914                         x86_push_reg (code, X86_ECX);
1915                         offset += 4;
1916                 }
1917                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1918                         x86_push_reg (code, X86_EDI);
1919                         offset += 4;
1920                 }
1921                 
1922                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1923                 if (sreg != X86_ECX)
1924                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1925                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1926                                 
1927                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1928                 x86_cld (code);
1929                 x86_prefix (code, X86_REP_PREFIX);
1930                 x86_stosl (code);
1931                 
1932                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1933                         x86_pop_reg (code, X86_EDI);
1934                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1935                         x86_pop_reg (code, X86_ECX);
1936                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1937                         x86_pop_reg (code, X86_EAX);
1938         }
1939         return code;
1940 }
1941
1942
1943 static guint8*
1944 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1945 {
1946         CallInfo *cinfo;
1947         int quad;
1948
1949         /* Move return value to the target register */
1950         switch (ins->opcode) {
1951         case CEE_CALL:
1952         case OP_CALL_REG:
1953         case OP_CALL_MEMBASE:
1954                 if (ins->dreg != X86_EAX)
1955                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1956                 break;
1957         case OP_VCALL:
1958         case OP_VCALL_REG:
1959         case OP_VCALL_MEMBASE:
1960                 cinfo = get_call_info (cfg, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
1961                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1962                         /* Pop the destination address from the stack */
1963                         x86_pop_reg (code, X86_ECX);
1964                         
1965                         for (quad = 0; quad < 2; quad ++) {
1966                                 switch (cinfo->ret.pair_storage [quad]) {
1967                                 case ArgInIReg:
1968                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1969                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1970                                         break;
1971                                 case ArgNone:
1972                                         break;
1973                                 default:
1974                                         g_assert_not_reached ();
1975                                 }
1976                         }
1977                 }
1978         default:
1979                 break;
1980         }
1981
1982         return code;
1983 }
1984
1985 /*
1986  * emit_tls_get:
1987  * @code: buffer to store code to
1988  * @dreg: hard register where to place the result
1989  * @tls_offset: offset info
1990  *
1991  * emit_tls_get emits in @code the native code that puts in the dreg register
1992  * the item in the thread local storage identified by tls_offset.
1993  *
1994  * Returns: a pointer to the end of the stored code
1995  */
1996 static guint8*
1997 emit_tls_get (guint8* code, int dreg, int tls_offset)
1998 {
1999 #ifdef PLATFORM_WIN32
2000         /* 
2001          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
2002          * Journal and/or a disassembly of the TlsGet () function.
2003          */
2004         g_assert (tls_offset < 64);
2005         x86_prefix (code, X86_FS_PREFIX);
2006         x86_mov_reg_mem (code, dreg, 0x18, 4);
2007         /* Dunno what this does but TlsGetValue () contains it */
2008         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2009         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2010 #else
2011         if (optimize_for_xen) {
2012                 x86_prefix (code, X86_GS_PREFIX);
2013                 x86_mov_reg_mem (code, dreg, 0, 4);
2014                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
2015         } else {
2016                 x86_prefix (code, X86_GS_PREFIX);
2017                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
2018         }
2019 #endif
2020         return code;
2021 }
2022
2023 /*
2024  * emit_load_volatile_arguments:
2025  *
2026  *  Load volatile arguments from the stack to the original input registers.
2027  * Required before a tail call.
2028  */
2029 static guint8*
2030 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2031 {
2032         MonoMethod *method = cfg->method;
2033         MonoMethodSignature *sig;
2034         MonoInst *inst;
2035         CallInfo *cinfo;
2036         guint32 i;
2037
2038         /* FIXME: Generate intermediate code instead */
2039
2040         sig = mono_method_signature (method);
2041
2042         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
2043         
2044         /* This is the opposite of the code in emit_prolog */
2045
2046         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2047                 ArgInfo *ainfo = cinfo->args + i;
2048                 MonoType *arg_type;
2049                 inst = cfg->args [i];
2050
2051                 if (sig->hasthis && (i == 0))
2052                         arg_type = &mono_defaults.object_class->byval_arg;
2053                 else
2054                         arg_type = sig->params [i - sig->hasthis];
2055
2056                 /*
2057                  * On x86, the arguments are either in their original stack locations, or in
2058                  * global regs.
2059                  */
2060                 if (inst->opcode == OP_REGVAR) {
2061                         g_assert (ainfo->storage == ArgOnStack);
2062                         
2063                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
2064                 }
2065         }
2066
2067         return code;
2068 }
2069
2070 #define REAL_PRINT_REG(text,reg) \
2071 mono_assert (reg >= 0); \
2072 x86_push_reg (code, X86_EAX); \
2073 x86_push_reg (code, X86_EDX); \
2074 x86_push_reg (code, X86_ECX); \
2075 x86_push_reg (code, reg); \
2076 x86_push_imm (code, reg); \
2077 x86_push_imm (code, text " %d %p\n"); \
2078 x86_mov_reg_imm (code, X86_EAX, printf); \
2079 x86_call_reg (code, X86_EAX); \
2080 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2081 x86_pop_reg (code, X86_ECX); \
2082 x86_pop_reg (code, X86_EDX); \
2083 x86_pop_reg (code, X86_EAX);
2084
2085 /* benchmark and set based on cpu */
2086 #define LOOP_ALIGNMENT 8
2087 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2088
2089 void
2090 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2091 {
2092         MonoInst *ins;
2093         MonoCallInst *call;
2094         guint offset;
2095         guint8 *code = cfg->native_code + cfg->code_len;
2096         int max_len, cpos;
2097
2098         if (cfg->opt & MONO_OPT_PEEPHOLE)
2099                 peephole_pass (cfg, bb);
2100
2101         if (cfg->opt & MONO_OPT_LOOP) {
2102                 int pad, align = LOOP_ALIGNMENT;
2103                 /* set alignment depending on cpu */
2104                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2105                         pad = align - pad;
2106                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2107                         x86_padding (code, pad);
2108                         cfg->code_len += pad;
2109                         bb->native_offset = cfg->code_len;
2110                 }
2111         }
2112
2113         if (cfg->verbose_level > 2)
2114                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2115
2116         cpos = bb->max_offset;
2117
2118         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2119                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2120                 g_assert (!cfg->compile_aot);
2121                 cpos += 6;
2122
2123                 cov->data [bb->dfn].cil_code = bb->cil_code;
2124                 /* this is not thread save, but good enough */
2125                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2126         }
2127
2128         offset = code - cfg->native_code;
2129
2130         mono_debug_open_block (cfg, bb, offset);
2131
2132         MONO_INST_LIST_FOR_EACH_ENTRY (ins, &bb->ins_list, node) {
2133                 offset = code - cfg->native_code;
2134
2135                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2136
2137                 if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) {
2138                         cfg->code_size *= 2;
2139                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2140                         code = cfg->native_code + offset;
2141                         mono_jit_stats.code_reallocs++;
2142                 }
2143
2144                 if (cfg->debug_info)
2145                         mono_debug_record_line_number (cfg, ins, offset);
2146
2147                 switch (ins->opcode) {
2148                 case OP_BIGMUL:
2149                         x86_mul_reg (code, ins->sreg2, TRUE);
2150                         break;
2151                 case OP_BIGMUL_UN:
2152                         x86_mul_reg (code, ins->sreg2, FALSE);
2153                         break;
2154                 case OP_X86_SETEQ_MEMBASE:
2155                 case OP_X86_SETNE_MEMBASE:
2156                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2157                                          ins->inst_basereg, ins->inst_offset, TRUE);
2158                         break;
2159                 case OP_STOREI1_MEMBASE_IMM:
2160                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2161                         break;
2162                 case OP_STOREI2_MEMBASE_IMM:
2163                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2164                         break;
2165                 case OP_STORE_MEMBASE_IMM:
2166                 case OP_STOREI4_MEMBASE_IMM:
2167                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2168                         break;
2169                 case OP_STOREI1_MEMBASE_REG:
2170                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2171                         break;
2172                 case OP_STOREI2_MEMBASE_REG:
2173                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2174                         break;
2175                 case OP_STORE_MEMBASE_REG:
2176                 case OP_STOREI4_MEMBASE_REG:
2177                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2178                         break;
2179                 case CEE_LDIND_I:
2180                 case CEE_LDIND_I4:
2181                 case CEE_LDIND_U4:
2182                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2183                         break;
2184                 case OP_LOADU4_MEM:
2185                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2186                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2187                         break;
2188                 case OP_LOAD_MEMBASE:
2189                 case OP_LOADI4_MEMBASE:
2190                 case OP_LOADU4_MEMBASE:
2191                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2192                         break;
2193                 case OP_LOADU1_MEMBASE:
2194                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2195                         break;
2196                 case OP_LOADI1_MEMBASE:
2197                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2198                         break;
2199                 case OP_LOADU2_MEMBASE:
2200                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2201                         break;
2202                 case OP_LOADI2_MEMBASE:
2203                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2204                         break;
2205                 case CEE_CONV_I1:
2206                 case OP_SEXT_I1:
2207                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2208                         break;
2209                 case CEE_CONV_I2:
2210                 case OP_SEXT_I2:
2211                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2212                         break;
2213                 case CEE_CONV_U1:
2214                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2215                         break;
2216                 case CEE_CONV_U2:
2217                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2218                         break;
2219                 case OP_COMPARE:
2220                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2221                         break;
2222                 case OP_COMPARE_IMM:
2223                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2224                         break;
2225                 case OP_X86_COMPARE_MEMBASE_REG:
2226                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2227                         break;
2228                 case OP_X86_COMPARE_MEMBASE_IMM:
2229                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2230                         break;
2231                 case OP_X86_COMPARE_MEMBASE8_IMM:
2232                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2233                         break;
2234                 case OP_X86_COMPARE_REG_MEMBASE:
2235                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2236                         break;
2237                 case OP_X86_COMPARE_MEM_IMM:
2238                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2239                         break;
2240                 case OP_X86_TEST_NULL:
2241                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2242                         break;
2243                 case OP_X86_ADD_MEMBASE_IMM:
2244                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2245                         break;
2246                 case OP_X86_ADD_MEMBASE:
2247                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2248                         break;
2249                 case OP_X86_SUB_MEMBASE_IMM:
2250                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2251                         break;
2252                 case OP_X86_SUB_MEMBASE:
2253                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2254                         break;
2255                 case OP_X86_AND_MEMBASE_IMM:
2256                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2257                         break;
2258                 case OP_X86_OR_MEMBASE_IMM:
2259                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2260                         break;
2261                 case OP_X86_XOR_MEMBASE_IMM:
2262                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2263                         break;
2264                 case OP_X86_INC_MEMBASE:
2265                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2266                         break;
2267                 case OP_X86_INC_REG:
2268                         x86_inc_reg (code, ins->dreg);
2269                         break;
2270                 case OP_X86_DEC_MEMBASE:
2271                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2272                         break;
2273                 case OP_X86_DEC_REG:
2274                         x86_dec_reg (code, ins->dreg);
2275                         break;
2276                 case OP_X86_MUL_MEMBASE:
2277                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2278                         break;
2279                 case OP_BREAK:
2280                         x86_breakpoint (code);
2281                         break;
2282                 case OP_ADDCC:
2283                 case CEE_ADD:
2284                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2285                         break;
2286                 case OP_ADC:
2287                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2288                         break;
2289                 case OP_ADDCC_IMM:
2290                 case OP_ADD_IMM:
2291                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2292                         break;
2293                 case OP_ADC_IMM:
2294                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2295                         break;
2296                 case OP_SUBCC:
2297                 case CEE_SUB:
2298                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2299                         break;
2300                 case OP_SBB:
2301                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2302                         break;
2303                 case OP_SUBCC_IMM:
2304                 case OP_SUB_IMM:
2305                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2306                         break;
2307                 case OP_SBB_IMM:
2308                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2309                         break;
2310                 case CEE_AND:
2311                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2312                         break;
2313                 case OP_AND_IMM:
2314                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2315                         break;
2316                 case CEE_DIV:
2317                 case CEE_REM:
2318                         /* 
2319                          * The code is the same for div/rem, the allocator will allocate dreg
2320                          * to RAX/RDX as appropriate.
2321                          */
2322                         if (ins->sreg2 == X86_EDX) {
2323                                 /* cdq clobbers this */
2324                                 x86_push_reg (code, ins->sreg2);
2325                                 x86_cdq (code);
2326                                 x86_div_membase (code, X86_ESP, 0, TRUE);
2327                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);                            
2328                         } else {
2329                                 x86_cdq (code);
2330                                 x86_div_reg (code, ins->sreg2, TRUE);
2331                         }
2332                         break;
2333                 case CEE_DIV_UN:
2334                 case CEE_REM_UN:
2335                         if (ins->sreg2 == X86_EDX) {
2336                                 x86_push_reg (code, ins->sreg2);
2337                                 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2338                                 x86_div_membase (code, X86_ESP, 0, FALSE);
2339                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);                            
2340                         } else {
2341                                 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2342                                 x86_div_reg (code, ins->sreg2, FALSE);
2343                         }
2344                         break;
2345                 case OP_DIV_IMM:
2346                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2347                         x86_cdq (code);
2348                         x86_div_reg (code, ins->sreg2, TRUE);
2349                         break;
2350                 case OP_REM_IMM:
2351                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2352                         x86_cdq (code);
2353                         x86_div_reg (code, ins->sreg2, TRUE);
2354                         break;
2355                 case CEE_OR:
2356                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2357                         break;
2358                 case OP_OR_IMM:
2359                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2360                         break;
2361                 case CEE_XOR:
2362                 case OP_IXOR:
2363                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2364                         break;
2365                 case OP_XOR_IMM:
2366                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2367                         break;
2368                 case CEE_SHL:
2369                         g_assert (ins->sreg2 == X86_ECX);
2370                         x86_shift_reg (code, X86_SHL, ins->dreg);
2371                         break;
2372                 case CEE_SHR:
2373                         g_assert (ins->sreg2 == X86_ECX);
2374                         x86_shift_reg (code, X86_SAR, ins->dreg);
2375                         break;
2376                 case OP_SHR_IMM:
2377                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2378                         break;
2379                 case OP_SHR_UN_IMM:
2380                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2381                         break;
2382                 case CEE_SHR_UN:
2383                         g_assert (ins->sreg2 == X86_ECX);
2384                         x86_shift_reg (code, X86_SHR, ins->dreg);
2385                         break;
2386                 case OP_SHL_IMM:
2387                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2388                         break;
2389                 case OP_LSHL: {
2390                         guint8 *jump_to_end;
2391
2392                         /* handle shifts below 32 bits */
2393                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2394                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2395
2396                         x86_test_reg_imm (code, X86_ECX, 32);
2397                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2398
2399                         /* handle shift over 32 bit */
2400                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2401                         x86_clear_reg (code, ins->sreg1);
2402                         
2403                         x86_patch (jump_to_end, code);
2404                         }
2405                         break;
2406                 case OP_LSHR: {
2407                         guint8 *jump_to_end;
2408
2409                         /* handle shifts below 32 bits */
2410                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2411                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2412
2413                         x86_test_reg_imm (code, X86_ECX, 32);
2414                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2415
2416                         /* handle shifts over 31 bits */
2417                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2418                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2419                         
2420                         x86_patch (jump_to_end, code);
2421                         }
2422                         break;
2423                 case OP_LSHR_UN: {
2424                         guint8 *jump_to_end;
2425
2426                         /* handle shifts below 32 bits */
2427                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2428                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2429
2430                         x86_test_reg_imm (code, X86_ECX, 32);
2431                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2432
2433                         /* handle shifts over 31 bits */
2434                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2435                         x86_clear_reg (code, ins->backend.reg3);
2436                         
2437                         x86_patch (jump_to_end, code);
2438                         }
2439                         break;
2440                 case OP_LSHL_IMM:
2441                         if (ins->inst_imm >= 32) {
2442                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2443                                 x86_clear_reg (code, ins->sreg1);
2444                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2445                         } else {
2446                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2447                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2448                         }
2449                         break;
2450                 case OP_LSHR_IMM:
2451                         if (ins->inst_imm >= 32) {
2452                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2453                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2454                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2455                         } else {
2456                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2457                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2458                         }
2459                         break;
2460                 case OP_LSHR_UN_IMM:
2461                         if (ins->inst_imm >= 32) {
2462                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2463                                 x86_clear_reg (code, ins->backend.reg3);
2464                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2465                         } else {
2466                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2467                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2468                         }
2469                         break;
2470                 case CEE_NOT:
2471                         x86_not_reg (code, ins->sreg1);
2472                         break;
2473                 case CEE_NEG:
2474                         x86_neg_reg (code, ins->sreg1);
2475                         break;
2476
2477                 case CEE_MUL:
2478                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2479                         break;
2480                 case OP_MUL_IMM:
2481                         switch (ins->inst_imm) {
2482                         case 2:
2483                                 /* MOV r1, r2 */
2484                                 /* ADD r1, r1 */
2485                                 if (ins->dreg != ins->sreg1)
2486                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2487                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2488                                 break;
2489                         case 3:
2490                                 /* LEA r1, [r2 + r2*2] */
2491                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2492                                 break;
2493                         case 5:
2494                                 /* LEA r1, [r2 + r2*4] */
2495                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2496                                 break;
2497                         case 6:
2498                                 /* LEA r1, [r2 + r2*2] */
2499                                 /* ADD r1, r1          */
2500                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2501                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2502                                 break;
2503                         case 9:
2504                                 /* LEA r1, [r2 + r2*8] */
2505                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2506                                 break;
2507                         case 10:
2508                                 /* LEA r1, [r2 + r2*4] */
2509                                 /* ADD r1, r1          */
2510                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2511                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2512                                 break;
2513                         case 12:
2514                                 /* LEA r1, [r2 + r2*2] */
2515                                 /* SHL r1, 2           */
2516                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2517                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2518                                 break;
2519                         case 25:
2520                                 /* LEA r1, [r2 + r2*4] */
2521                                 /* LEA r1, [r1 + r1*4] */
2522                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2523                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2524                                 break;
2525                         case 100:
2526                                 /* LEA r1, [r2 + r2*4] */
2527                                 /* SHL r1, 2           */
2528                                 /* LEA r1, [r1 + r1*4] */
2529                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2530                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2531                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2532                                 break;
2533                         default:
2534                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2535                                 break;
2536                         }
2537                         break;
2538                 case CEE_MUL_OVF:
2539                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2540                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2541                         break;
2542                 case CEE_MUL_OVF_UN: {
2543                         /* the mul operation and the exception check should most likely be split */
2544                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2545                         /*g_assert (ins->sreg2 == X86_EAX);
2546                         g_assert (ins->dreg == X86_EAX);*/
2547                         if (ins->sreg2 == X86_EAX) {
2548                                 non_eax_reg = ins->sreg1;
2549                         } else if (ins->sreg1 == X86_EAX) {
2550                                 non_eax_reg = ins->sreg2;
2551                         } else {
2552                                 /* no need to save since we're going to store to it anyway */
2553                                 if (ins->dreg != X86_EAX) {
2554                                         saved_eax = TRUE;
2555                                         x86_push_reg (code, X86_EAX);
2556                                 }
2557                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2558                                 non_eax_reg = ins->sreg2;
2559                         }
2560                         if (ins->dreg == X86_EDX) {
2561                                 if (!saved_eax) {
2562                                         saved_eax = TRUE;
2563                                         x86_push_reg (code, X86_EAX);
2564                                 }
2565                         } else if (ins->dreg != X86_EAX) {
2566                                 saved_edx = TRUE;
2567                                 x86_push_reg (code, X86_EDX);
2568                         }
2569                         x86_mul_reg (code, non_eax_reg, FALSE);
2570                         /* save before the check since pop and mov don't change the flags */
2571                         if (ins->dreg != X86_EAX)
2572                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2573                         if (saved_edx)
2574                                 x86_pop_reg (code, X86_EDX);
2575                         if (saved_eax)
2576                                 x86_pop_reg (code, X86_EAX);
2577                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2578                         break;
2579                 }
2580                 case OP_ICONST:
2581                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2582                         break;
2583                 case OP_AOTCONST:
2584                         g_assert_not_reached ();
2585                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2586                         x86_mov_reg_imm (code, ins->dreg, 0);
2587                         break;
2588                 case OP_LOAD_GOTADDR:
2589                         x86_call_imm (code, 0);
2590                         /* 
2591                          * The patch needs to point to the pop, since the GOT offset needs 
2592                          * to be added to that address.
2593                          */
2594                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2595                         x86_pop_reg (code, ins->dreg);
2596                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2597                         break;
2598                 case OP_GOT_ENTRY:
2599                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2600                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2601                         break;
2602                 case OP_X86_PUSH_GOT_ENTRY:
2603                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2604                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2605                         break;
2606                 case CEE_CONV_I4:
2607                 case OP_MOVE:
2608                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2609                         break;
2610                 case CEE_CONV_U4:
2611                         g_assert_not_reached ();
2612                 case OP_JMP: {
2613                         /*
2614                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2615                          * Keep in sync with the code in emit_epilog.
2616                          */
2617                         int pos = 0;
2618
2619                         /* FIXME: no tracing support... */
2620                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2621                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2622                         /* reset offset to make max_len work */
2623                         offset = code - cfg->native_code;
2624
2625                         g_assert (!cfg->method->save_lmf);
2626
2627                         code = emit_load_volatile_arguments (cfg, code);
2628
2629                         if (cfg->used_int_regs & (1 << X86_EBX))
2630                                 pos -= 4;
2631                         if (cfg->used_int_regs & (1 << X86_EDI))
2632                                 pos -= 4;
2633                         if (cfg->used_int_regs & (1 << X86_ESI))
2634                                 pos -= 4;
2635                         if (pos)
2636                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2637         
2638                         if (cfg->used_int_regs & (1 << X86_ESI))
2639                                 x86_pop_reg (code, X86_ESI);
2640                         if (cfg->used_int_regs & (1 << X86_EDI))
2641                                 x86_pop_reg (code, X86_EDI);
2642                         if (cfg->used_int_regs & (1 << X86_EBX))
2643                                 x86_pop_reg (code, X86_EBX);
2644         
2645                         /* restore ESP/EBP */
2646                         x86_leave (code);
2647                         offset = code - cfg->native_code;
2648                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2649                         x86_jump32 (code, 0);
2650                         break;
2651                 }
2652                 case OP_CHECK_THIS:
2653                         /* ensure ins->sreg1 is not NULL
2654                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2655                          * cmp DWORD PTR [eax], 0
2656                          */
2657                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2658                         break;
2659                 case OP_ARGLIST: {
2660                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2661                         x86_push_reg (code, hreg);
2662                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2663                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2664                         x86_pop_reg (code, hreg);
2665                         break;
2666                 }
2667                 case OP_FCALL:
2668                 case OP_LCALL:
2669                 case OP_VCALL:
2670                 case OP_VOIDCALL:
2671                 case CEE_CALL:
2672                         call = (MonoCallInst*)ins;
2673                         if (ins->flags & MONO_INST_HAS_METHOD)
2674                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2675                         else
2676                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2677                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2678                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2679                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2680                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2681                                  * smart enough to do that optimization yet
2682                                  *
2683                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2684                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2685                                  * (most likely from locality benefits). People with other processors should
2686                                  * check on theirs to see what happens.
2687                                  */
2688                                 if (call->stack_usage == 4) {
2689                                         /* we want to use registers that won't get used soon, so use
2690                                          * ecx, as eax will get allocated first. edx is used by long calls,
2691                                          * so we can't use that.
2692                                          */
2693                                         
2694                                         x86_pop_reg (code, X86_ECX);
2695                                 } else {
2696                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2697                                 }
2698                         }
2699                         code = emit_move_return_value (cfg, ins, code);
2700                         break;
2701                 case OP_FCALL_REG:
2702                 case OP_LCALL_REG:
2703                 case OP_VCALL_REG:
2704                 case OP_VOIDCALL_REG:
2705                 case OP_CALL_REG:
2706                         call = (MonoCallInst*)ins;
2707                         x86_call_reg (code, ins->sreg1);
2708                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2709                                 if (call->stack_usage == 4)
2710                                         x86_pop_reg (code, X86_ECX);
2711                                 else
2712                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2713                         }
2714                         code = emit_move_return_value (cfg, ins, code);
2715                         break;
2716                 case OP_FCALL_MEMBASE:
2717                 case OP_LCALL_MEMBASE:
2718                 case OP_VCALL_MEMBASE:
2719                 case OP_VOIDCALL_MEMBASE:
2720                 case OP_CALL_MEMBASE:
2721                         call = (MonoCallInst*)ins;
2722                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2723                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2724                                 if (call->stack_usage == 4)
2725                                         x86_pop_reg (code, X86_ECX);
2726                                 else
2727                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2728                         }
2729                         code = emit_move_return_value (cfg, ins, code);
2730                         break;
2731                 case OP_OUTARG:
2732                 case OP_X86_PUSH:
2733                         x86_push_reg (code, ins->sreg1);
2734                         break;
2735                 case OP_X86_PUSH_IMM:
2736                         x86_push_imm (code, ins->inst_imm);
2737                         break;
2738                 case OP_X86_PUSH_MEMBASE:
2739                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2740                         break;
2741                 case OP_X86_PUSH_OBJ: 
2742                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2743                         x86_push_reg (code, X86_EDI);
2744                         x86_push_reg (code, X86_ESI);
2745                         x86_push_reg (code, X86_ECX);
2746                         if (ins->inst_offset)
2747                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2748                         else
2749                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2750                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2751                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2752                         x86_cld (code);
2753                         x86_prefix (code, X86_REP_PREFIX);
2754                         x86_movsd (code);
2755                         x86_pop_reg (code, X86_ECX);
2756                         x86_pop_reg (code, X86_ESI);
2757                         x86_pop_reg (code, X86_EDI);
2758                         break;
2759                 case OP_X86_LEA:
2760                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2761                         break;
2762                 case OP_X86_LEA_MEMBASE:
2763                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2764                         break;
2765                 case OP_X86_XCHG:
2766                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2767                         break;
2768                 case OP_LOCALLOC:
2769                         /* keep alignment */
2770                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2771                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2772                         code = mono_emit_stack_alloc (code, ins);
2773                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2774                         break;
2775                 case CEE_RET:
2776                         x86_ret (code);
2777                         break;
2778                 case OP_THROW: {
2779                         x86_push_reg (code, ins->sreg1);
2780                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2781                                                           (gpointer)"mono_arch_throw_exception");
2782                         break;
2783                 }
2784                 case OP_RETHROW: {
2785                         x86_push_reg (code, ins->sreg1);
2786                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2787                                                           (gpointer)"mono_arch_rethrow_exception");
2788                         break;
2789                 }
2790                 case OP_CALL_HANDLER: 
2791                         /* Align stack */
2792 #ifdef __APPLE__
2793                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2794 #endif
2795                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2796                         x86_call_imm (code, 0);
2797 #ifdef __APPLE__
2798                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2799 #endif
2800                         break;
2801                 case OP_START_HANDLER: {
2802                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2803                         x86_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, X86_ESP, 4);
2804                         break;
2805                 }
2806                 case OP_ENDFINALLY: {
2807                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2808                         x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
2809                         x86_ret (code);
2810                         break;
2811                 }
2812                 case OP_ENDFILTER: {
2813                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2814                         x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
2815                         /* The local allocator will put the result into EAX */
2816                         x86_ret (code);
2817                         break;
2818                 }
2819
2820                 case OP_LABEL:
2821                         ins->inst_c0 = code - cfg->native_code;
2822                         break;
2823                 case OP_BR:
2824                         if (ins->flags & MONO_INST_BRLABEL) {
2825                                 if (ins->inst_i0->inst_c0) {
2826                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2827                                 } else {
2828                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2829                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2830                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2831                                                 x86_jump8 (code, 0);
2832                                         else 
2833                                                 x86_jump32 (code, 0);
2834                                 }
2835                         } else {
2836                                 if (ins->inst_target_bb->native_offset) {
2837                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2838                                 } else {
2839                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2840                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2841                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2842                                                 x86_jump8 (code, 0);
2843                                         else 
2844                                                 x86_jump32 (code, 0);
2845                                 } 
2846                         }
2847                         break;
2848                 case OP_BR_REG:
2849                         x86_jump_reg (code, ins->sreg1);
2850                         break;
2851                 case OP_CEQ:
2852                 case OP_CLT:
2853                 case OP_CLT_UN:
2854                 case OP_CGT:
2855                 case OP_CGT_UN:
2856                 case OP_CNE:
2857                         x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2858                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2859                         break;
2860                 case OP_COND_EXC_EQ:
2861                 case OP_COND_EXC_NE_UN:
2862                 case OP_COND_EXC_LT:
2863                 case OP_COND_EXC_LT_UN:
2864                 case OP_COND_EXC_GT:
2865                 case OP_COND_EXC_GT_UN:
2866                 case OP_COND_EXC_GE:
2867                 case OP_COND_EXC_GE_UN:
2868                 case OP_COND_EXC_LE:
2869                 case OP_COND_EXC_LE_UN:
2870                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
2871                         break;
2872                 case OP_COND_EXC_OV:
2873                 case OP_COND_EXC_NO:
2874                 case OP_COND_EXC_C:
2875                 case OP_COND_EXC_NC:
2876                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2877                         break;
2878                 case CEE_BEQ:
2879                 case CEE_BNE_UN:
2880                 case CEE_BLT:
2881                 case CEE_BLT_UN:
2882                 case CEE_BGT:
2883                 case CEE_BGT_UN:
2884                 case CEE_BGE:
2885                 case CEE_BGE_UN:
2886                 case CEE_BLE:
2887                 case CEE_BLE_UN:
2888                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2889                         break;
2890
2891                 /* floating point opcodes */
2892                 case OP_R8CONST: {
2893                         double d = *(double *)ins->inst_p0;
2894
2895                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2896                                 x86_fldz (code);
2897                         } else if (d == 1.0) {
2898                                 x86_fld1 (code);
2899                         } else {
2900                                 if (cfg->compile_aot) {
2901                                         guint32 *val = (guint32*)&d;
2902                                         x86_push_imm (code, val [1]);
2903                                         x86_push_imm (code, val [0]);
2904                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2905                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2906                                 }
2907                                 else {
2908                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2909                                         x86_fld (code, NULL, TRUE);
2910                                 }
2911                         }
2912                         break;
2913                 }
2914                 case OP_R4CONST: {
2915                         float f = *(float *)ins->inst_p0;
2916
2917                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2918                                 x86_fldz (code);
2919                         } else if (f == 1.0) {
2920                                 x86_fld1 (code);
2921                         } else {
2922                                 if (cfg->compile_aot) {
2923                                         guint32 val = *(guint32*)&f;
2924                                         x86_push_imm (code, val);
2925                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2926                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2927                                 }
2928                                 else {
2929                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2930                                         x86_fld (code, NULL, FALSE);
2931                                 }
2932                         }
2933                         break;
2934                 }
2935                 case OP_STORER8_MEMBASE_REG:
2936                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2937                         break;
2938                 case OP_LOADR8_SPILL_MEMBASE:
2939                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2940                         x86_fxch (code, 1);
2941                         break;
2942                 case OP_LOADR8_MEMBASE:
2943                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2944                         break;
2945                 case OP_STORER4_MEMBASE_REG:
2946                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2947                         break;
2948                 case OP_LOADR4_MEMBASE:
2949                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2950                         break;
2951                 case CEE_CONV_R4: /* FIXME: change precision */
2952                 case CEE_CONV_R8:
2953                         x86_push_reg (code, ins->sreg1);
2954                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2955                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2956                         break;
2957                 case OP_X86_FP_LOAD_I8:
2958                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2959                         break;
2960                 case OP_X86_FP_LOAD_I4:
2961                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2962                         break;
2963                 case OP_FCONV_TO_I1:
2964                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2965                         break;
2966                 case OP_FCONV_TO_U1:
2967                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2968                         break;
2969                 case OP_FCONV_TO_I2:
2970                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2971                         break;
2972                 case OP_FCONV_TO_U2:
2973                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2974                         break;
2975                 case OP_FCONV_TO_I4:
2976                 case OP_FCONV_TO_I:
2977                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2978                         break;
2979                 case OP_FCONV_TO_I8:
2980                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2981                         x86_fnstcw_membase(code, X86_ESP, 0);
2982                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2983                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2984                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2985                         x86_fldcw_membase (code, X86_ESP, 2);
2986                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2987                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2988                         x86_pop_reg (code, ins->dreg);
2989                         x86_pop_reg (code, ins->backend.reg3);
2990                         x86_fldcw_membase (code, X86_ESP, 0);
2991                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2992                         break;
2993                 case OP_LCONV_TO_R_UN: { 
2994                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2995                         guint8 *br;
2996
2997                         /* load 64bit integer to FP stack */
2998                         x86_push_imm (code, 0);
2999                         x86_push_reg (code, ins->sreg2);
3000                         x86_push_reg (code, ins->sreg1);
3001                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3002                         /* store as 80bit FP value */
3003                         x86_fst80_membase (code, X86_ESP, 0);
3004                         
3005                         /* test if lreg is negative */
3006                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3007                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3008         
3009                         /* add correction constant mn */
3010                         x86_fld80_mem (code, mn);
3011                         x86_fld80_membase (code, X86_ESP, 0);
3012                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3013                         x86_fst80_membase (code, X86_ESP, 0);
3014
3015                         x86_patch (br, code);
3016
3017                         x86_fld80_membase (code, X86_ESP, 0);
3018                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
3019
3020                         break;
3021                 }
3022                 case OP_LCONV_TO_OVF_I: {
3023                         guint8 *br [3], *label [1];
3024                         MonoInst *tins;
3025
3026                         /* 
3027                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3028                          */
3029                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3030
3031                         /* If the low word top bit is set, see if we are negative */
3032                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3033                         /* We are not negative (no top bit set, check for our top word to be zero */
3034                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3035                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3036                         label [0] = code;
3037
3038                         /* throw exception */
3039                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
3040                         if (tins) {
3041                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
3042                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
3043                                         x86_jump8 (code, 0);
3044                                 else
3045                                         x86_jump32 (code, 0);
3046                         } else {
3047                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3048                                 x86_jump32 (code, 0);
3049                         }
3050         
3051         
3052                         x86_patch (br [0], code);
3053                         /* our top bit is set, check that top word is 0xfffffff */
3054                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3055                 
3056                         x86_patch (br [1], code);
3057                         /* nope, emit exception */
3058                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3059                         x86_patch (br [2], label [0]);
3060
3061                         if (ins->dreg != ins->sreg1)
3062                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3063                         break;
3064                 }
3065                 case OP_FADD:
3066                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3067                         break;
3068                 case OP_FSUB:
3069                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3070                         break;          
3071                 case OP_FMUL:
3072                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3073                         break;          
3074                 case OP_FDIV:
3075                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3076                         break;          
3077                 case OP_FNEG:
3078                         x86_fchs (code);
3079                         break;          
3080                 case OP_SIN:
3081                         x86_fsin (code);
3082                         x86_fldz (code);
3083                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3084                         break;          
3085                 case OP_COS:
3086                         x86_fcos (code);
3087                         x86_fldz (code);
3088                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3089                         break;          
3090                 case OP_ABS:
3091                         x86_fabs (code);
3092                         break;          
3093                 case OP_TAN: {
3094                         /* 
3095                          * it really doesn't make sense to inline all this code,
3096                          * it's here just to show that things may not be as simple 
3097                          * as they appear.
3098                          */
3099                         guchar *check_pos, *end_tan, *pop_jump;
3100                         x86_push_reg (code, X86_EAX);
3101                         x86_fptan (code);
3102                         x86_fnstsw (code);
3103                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3104                         check_pos = code;
3105                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3106                         x86_fstp (code, 0); /* pop the 1.0 */
3107                         end_tan = code;
3108                         x86_jump8 (code, 0);
3109                         x86_fldpi (code);
3110                         x86_fp_op (code, X86_FADD, 0);
3111                         x86_fxch (code, 1);
3112                         x86_fprem1 (code);
3113                         x86_fstsw (code);
3114                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3115                         pop_jump = code;
3116                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3117                         x86_fstp (code, 1);
3118                         x86_fptan (code);
3119                         x86_patch (pop_jump, code);
3120                         x86_fstp (code, 0); /* pop the 1.0 */
3121                         x86_patch (check_pos, code);
3122                         x86_patch (end_tan, code);
3123                         x86_fldz (code);
3124                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3125                         x86_pop_reg (code, X86_EAX);
3126                         break;
3127                 }
3128                 case OP_ATAN:
3129                         x86_fld1 (code);
3130                         x86_fpatan (code);
3131                         x86_fldz (code);
3132                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3133                         break;          
3134                 case OP_SQRT:
3135                         x86_fsqrt (code);
3136                         break;          
3137                 case OP_X86_FPOP:
3138                         x86_fstp (code, 0);
3139                         break;          
3140                 case OP_FREM: {
3141                         guint8 *l1, *l2;
3142
3143                         x86_push_reg (code, X86_EAX);
3144                         /* we need to exchange ST(0) with ST(1) */
3145                         x86_fxch (code, 1);
3146
3147                         /* this requires a loop, because fprem somtimes 
3148                          * returns a partial remainder */
3149                         l1 = code;
3150                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3151                         /* x86_fprem1 (code); */
3152                         x86_fprem (code);
3153                         x86_fnstsw (code);
3154                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3155                         l2 = code + 2;
3156                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3157
3158                         /* pop result */
3159                         x86_fstp (code, 1);
3160
3161                         x86_pop_reg (code, X86_EAX);
3162                         break;
3163                 }
3164                 case OP_FCOMPARE:
3165                         if (cfg->opt & MONO_OPT_FCMOV) {
3166                                 x86_fcomip (code, 1);
3167                                 x86_fstp (code, 0);
3168                                 break;
3169                         }
3170                         /* this overwrites EAX */
3171                         EMIT_FPCOMPARE(code);
3172                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3173                         break;
3174                 case OP_FCEQ:
3175                         if (cfg->opt & MONO_OPT_FCMOV) {
3176                                 /* zeroing the register at the start results in 
3177                                  * shorter and faster code (we can also remove the widening op)
3178                                  */
3179                                 guchar *unordered_check;
3180                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3181                                 x86_fcomip (code, 1);
3182                                 x86_fstp (code, 0);
3183                                 unordered_check = code;
3184                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3185                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3186                                 x86_patch (unordered_check, code);
3187                                 break;
3188                         }
3189                         if (ins->dreg != X86_EAX) 
3190                                 x86_push_reg (code, X86_EAX);
3191
3192                         EMIT_FPCOMPARE(code);
3193                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3194                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3195                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3196                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3197
3198                         if (ins->dreg != X86_EAX) 
3199                                 x86_pop_reg (code, X86_EAX);
3200                         break;
3201                 case OP_FCLT:
3202                 case OP_FCLT_UN:
3203                         if (cfg->opt & MONO_OPT_FCMOV) {
3204                                 /* zeroing the register at the start results in 
3205                                  * shorter and faster code (we can also remove the widening op)
3206                                  */
3207                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3208                                 x86_fcomip (code, 1);
3209                                 x86_fstp (code, 0);
3210                                 if (ins->opcode == OP_FCLT_UN) {
3211                                         guchar *unordered_check = code;
3212                                         guchar *jump_to_end;
3213                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3214                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3215                                         jump_to_end = code;
3216                                         x86_jump8 (code, 0);
3217                                         x86_patch (unordered_check, code);
3218                                         x86_inc_reg (code, ins->dreg);
3219                                         x86_patch (jump_to_end, code);
3220                                 } else {
3221                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3222                                 }
3223                                 break;
3224                         }
3225                         if (ins->dreg != X86_EAX) 
3226                                 x86_push_reg (code, X86_EAX);
3227
3228                         EMIT_FPCOMPARE(code);
3229                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3230                         if (ins->opcode == OP_FCLT_UN) {
3231                                 guchar *is_not_zero_check, *end_jump;
3232                                 is_not_zero_check = code;
3233                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3234                                 end_jump = code;
3235                                 x86_jump8 (code, 0);
3236                                 x86_patch (is_not_zero_check, code);
3237                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3238
3239                                 x86_patch (end_jump, code);
3240                         }
3241                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3242                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3243
3244                         if (ins->dreg != X86_EAX) 
3245                                 x86_pop_reg (code, X86_EAX);
3246                         break;
3247                 case OP_FCGT:
3248                 case OP_FCGT_UN:
3249                         if (cfg->opt & MONO_OPT_FCMOV) {
3250                                 /* zeroing the register at the start results in 
3251                                  * shorter and faster code (we can also remove the widening op)
3252                                  */
3253                                 guchar *unordered_check;
3254                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3255                                 x86_fcomip (code, 1);
3256                                 x86_fstp (code, 0);
3257                                 if (ins->opcode == OP_FCGT) {
3258                                         unordered_check = code;
3259                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3260                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3261                                         x86_patch (unordered_check, code);
3262                                 } else {
3263                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3264                                 }
3265                                 break;
3266                         }
3267                         if (ins->dreg != X86_EAX) 
3268                                 x86_push_reg (code, X86_EAX);
3269
3270                         EMIT_FPCOMPARE(code);
3271                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3272                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3273                         if (ins->opcode == OP_FCGT_UN) {
3274                                 guchar *is_not_zero_check, *end_jump;
3275                                 is_not_zero_check = code;
3276                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3277                                 end_jump = code;
3278                                 x86_jump8 (code, 0);
3279                                 x86_patch (is_not_zero_check, code);
3280                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3281         
3282                                 x86_patch (end_jump, code);
3283                         }
3284                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3285                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3286
3287                         if (ins->dreg != X86_EAX) 
3288                                 x86_pop_reg (code, X86_EAX);
3289                         break;
3290                 case OP_FBEQ:
3291                         if (cfg->opt & MONO_OPT_FCMOV) {
3292                                 guchar *jump = code;
3293                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3294                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3295                                 x86_patch (jump, code);
3296                                 break;
3297                         }
3298                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3299                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3300                         break;
3301                 case OP_FBNE_UN:
3302                         /* Branch if C013 != 100 */
3303                         if (cfg->opt & MONO_OPT_FCMOV) {
3304                                 /* branch if !ZF or (PF|CF) */
3305                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3306                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3307                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3308                                 break;
3309                         }
3310                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3311                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3312                         break;
3313                 case OP_FBLT:
3314                         if (cfg->opt & MONO_OPT_FCMOV) {
3315                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3316                                 break;
3317                         }
3318                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3319                         break;
3320                 case OP_FBLT_UN:
3321                         if (cfg->opt & MONO_OPT_FCMOV) {
3322                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3323                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3324                                 break;
3325                         }
3326                         if (ins->opcode == OP_FBLT_UN) {
3327                                 guchar *is_not_zero_check, *end_jump;
3328                                 is_not_zero_check = code;
3329                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3330                                 end_jump = code;
3331                                 x86_jump8 (code, 0);
3332                                 x86_patch (is_not_zero_check, code);
3333                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3334
3335                                 x86_patch (end_jump, code);
3336                         }
3337                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3338                         break;
3339                 case OP_FBGT:
3340                 case OP_FBGT_UN:
3341                         if (cfg->opt & MONO_OPT_FCMOV) {
3342                                 if (ins->opcode == OP_FBGT) {
3343                                         guchar *br1;
3344
3345                                         /* skip branch if C1=1 */
3346                                         br1 = code;
3347                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3348                                         /* branch if (C0 | C3) = 1 */
3349                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3350                                         x86_patch (br1, code);
3351                                 } else {
3352                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3353                                 }
3354                                 break;
3355                         }
3356                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3357                         if (ins->opcode == OP_FBGT_UN) {
3358                                 guchar *is_not_zero_check, *end_jump;
3359                                 is_not_zero_check = code;
3360                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3361                                 end_jump = code;
3362                                 x86_jump8 (code, 0);
3363                                 x86_patch (is_not_zero_check, code);
3364                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3365
3366                                 x86_patch (end_jump, code);
3367                         }
3368                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3369                         break;
3370                 case OP_FBGE:
3371                         /* Branch if C013 == 100 or 001 */
3372                         if (cfg->opt & MONO_OPT_FCMOV) {
3373                                 guchar *br1;
3374
3375                                 /* skip branch if C1=1 */
3376                                 br1 = code;
3377                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3378                                 /* branch if (C0 | C3) = 1 */
3379                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3380                                 x86_patch (br1, code);
3381                                 break;
3382                         }
3383                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3384                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3385                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3386                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3387                         break;
3388                 case OP_FBGE_UN:
3389                         /* Branch if C013 == 000 */
3390                         if (cfg->opt & MONO_OPT_FCMOV) {
3391                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3392                                 break;
3393                         }
3394                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3395                         break;
3396                 case OP_FBLE:
3397                         /* Branch if C013=000 or 100 */
3398                         if (cfg->opt & MONO_OPT_FCMOV) {
3399                                 guchar *br1;
3400
3401                                 /* skip branch if C1=1 */
3402                                 br1 = code;
3403                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3404                                 /* branch if C0=0 */
3405                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3406                                 x86_patch (br1, code);
3407                                 break;
3408                         }
3409                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3410                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3411                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3412                         break;
3413                 case OP_FBLE_UN:
3414                         /* Branch if C013 != 001 */
3415                         if (cfg->opt & MONO_OPT_FCMOV) {
3416                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3417                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3418                                 break;
3419                         }
3420                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3421                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3422                         break;
3423                 case OP_CKFINITE: {
3424                         guchar *br1;
3425                         x86_push_reg (code, X86_EAX);
3426                         x86_fxam (code);
3427                         x86_fnstsw (code);
3428                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3429                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3430                         x86_pop_reg (code, X86_EAX);
3431
3432                         /* Have to clean up the fp stack before throwing the exception */
3433                         br1 = code;
3434                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3435
3436                         x86_fstp (code, 0);                     
3437                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3438
3439                         x86_patch (br1, code);
3440                         break;
3441                 }
3442                 case OP_TLS_GET: {
3443                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3444                         break;
3445                 }
3446                 case OP_MEMORY_BARRIER: {
3447                         /* Not needed on x86 */
3448                         break;
3449                 }
3450                 case OP_ATOMIC_ADD_I4: {
3451                         int dreg = ins->dreg;
3452
3453                         if (dreg == ins->inst_basereg) {
3454                                 x86_push_reg (code, ins->sreg2);
3455                                 dreg = ins->sreg2;
3456                         } 
3457                         
3458                         if (dreg != ins->sreg2)
3459                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3460
3461                         x86_prefix (code, X86_LOCK_PREFIX);
3462                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3463
3464                         if (dreg != ins->dreg) {
3465                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3466                                 x86_pop_reg (code, dreg);
3467                         }
3468
3469                         break;
3470                 }
3471                 case OP_ATOMIC_ADD_NEW_I4: {
3472                         int dreg = ins->dreg;
3473
3474                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3475                         if (ins->sreg2 == dreg) {
3476                                 if (dreg == X86_EBX) {
3477                                         dreg = X86_EDI;
3478                                         if (ins->inst_basereg == X86_EDI)
3479                                                 dreg = X86_ESI;
3480                                 } else {
3481                                         dreg = X86_EBX;
3482                                         if (ins->inst_basereg == X86_EBX)
3483                                                 dreg = X86_EDI;
3484                                 }
3485                         } else if (ins->inst_basereg == dreg) {
3486                                 if (dreg == X86_EBX) {
3487                                         dreg = X86_EDI;
3488                                         if (ins->sreg2 == X86_EDI)
3489                                                 dreg = X86_ESI;
3490                                 } else {
3491                                         dreg = X86_EBX;
3492                                         if (ins->sreg2 == X86_EBX)
3493                                                 dreg = X86_EDI;
3494                                 }
3495                         }
3496
3497                         if (dreg != ins->dreg) {
3498                                 x86_push_reg (code, dreg);
3499                         }
3500
3501                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3502                         x86_prefix (code, X86_LOCK_PREFIX);
3503                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3504                         /* dreg contains the old value, add with sreg2 value */
3505                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3506                         
3507                         if (ins->dreg != dreg) {
3508                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3509                                 x86_pop_reg (code, dreg);
3510                         }
3511
3512                         break;
3513                 }
3514                 case OP_ATOMIC_EXCHANGE_I4: {
3515                         guchar *br[2];
3516                         int sreg2 = ins->sreg2;
3517                         int breg = ins->inst_basereg;
3518
3519                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3520                          * hack to overcome limits in x86 reg allocator 
3521                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3522                          */
3523                         if (ins->dreg != X86_EAX)
3524                                 x86_push_reg (code, X86_EAX);
3525                         
3526                         /* We need the EAX reg for the cmpxchg */
3527                         if (ins->sreg2 == X86_EAX) {
3528                                 x86_push_reg (code, X86_EDX);
3529                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3530                                 sreg2 = X86_EDX;
3531                         }
3532
3533                         if (breg == X86_EAX) {
3534                                 x86_push_reg (code, X86_ESI);
3535                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3536                                 breg = X86_ESI;
3537                         }
3538
3539                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3540
3541                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3542                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3543                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3544                         x86_patch (br [1], br [0]);
3545
3546                         if (breg != ins->inst_basereg)
3547                                 x86_pop_reg (code, X86_ESI);
3548
3549                         if (ins->dreg != X86_EAX) {
3550                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3551                                 x86_pop_reg (code, X86_EAX);
3552                         }
3553
3554                         if (ins->sreg2 != sreg2)
3555                                 x86_pop_reg (code, X86_EDX);
3556
3557                         break;
3558                 }
3559                 default:
3560                         g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
3561                         g_assert_not_reached ();
3562                 }
3563
3564                 if (G_UNLIKELY ((code - cfg->native_code - offset) > max_len)) {
3565                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3566                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3567                         g_assert_not_reached ();
3568                 }
3569                
3570                 cpos += max_len;
3571         }
3572
3573         cfg->code_len = code - cfg->native_code;
3574 }
3575
3576 void
3577 mono_arch_register_lowlevel_calls (void)
3578 {
3579 }
3580
3581 void
3582 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3583 {
3584         MonoJumpInfo *patch_info;
3585         gboolean compile_aot = !run_cctors;
3586
3587         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3588                 unsigned char *ip = patch_info->ip.i + code;
3589                 const unsigned char *target;
3590
3591                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3592
3593                 if (compile_aot) {
3594                         switch (patch_info->type) {
3595                         case MONO_PATCH_INFO_BB:
3596                         case MONO_PATCH_INFO_LABEL:
3597                                 break;
3598                         default:
3599                                 /* No need to patch these */
3600                                 continue;
3601                         }
3602                 }
3603
3604                 switch (patch_info->type) {
3605                 case MONO_PATCH_INFO_IP:
3606                         *((gconstpointer *)(ip)) = target;
3607                         break;
3608                 case MONO_PATCH_INFO_CLASS_INIT: {
3609                         guint8 *code = ip;
3610                         /* Might already been changed to a nop */
3611                         x86_call_code (code, 0);
3612                         x86_patch (ip, target);
3613                         break;
3614                 }
3615                 case MONO_PATCH_INFO_ABS:
3616                 case MONO_PATCH_INFO_METHOD:
3617                 case MONO_PATCH_INFO_METHOD_JUMP:
3618                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3619                 case MONO_PATCH_INFO_BB:
3620                 case MONO_PATCH_INFO_LABEL:
3621                         x86_patch (ip, target);
3622                         break;
3623                 case MONO_PATCH_INFO_NONE:
3624                         break;
3625                 default: {
3626                         guint32 offset = mono_arch_get_patch_offset (ip);
3627                         *((gconstpointer *)(ip + offset)) = target;
3628                         break;
3629                 }
3630                 }
3631         }
3632 }
3633
3634 guint8 *
3635 mono_arch_emit_prolog (MonoCompile *cfg)
3636 {
3637         MonoMethod *method = cfg->method;
3638         MonoBasicBlock *bb;
3639         MonoMethodSignature *sig;
3640         MonoInst *inst;
3641         int alloc_size, pos, max_offset, i;
3642         guint8 *code;
3643
3644         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 1024);
3645
3646         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3647                 cfg->code_size += 512;
3648
3649         code = cfg->native_code = g_malloc (cfg->code_size);
3650
3651         x86_push_reg (code, X86_EBP);
3652         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3653
3654         alloc_size = cfg->stack_offset;
3655         pos = 0;
3656
3657         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3658                 /* Might need to attach the thread to the JIT  or change the domain for the callback */
3659                 if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
3660                         guint8 *buf, *no_domain_branch;
3661
3662                         code = emit_tls_get (code, X86_EAX, appdomain_tls_offset);
3663                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, GPOINTER_TO_UINT (cfg->domain));
3664                         no_domain_branch = code;
3665                         x86_branch8 (code, X86_CC_NE, 0, 0);
3666                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3667                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3668                         buf = code;
3669                         x86_branch8 (code, X86_CC_NE, 0, 0);
3670                         x86_patch (no_domain_branch, code);
3671                         x86_push_imm (code, cfg->domain);
3672                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3673                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3674                         x86_patch (buf, code);
3675 #ifdef PLATFORM_WIN32
3676                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3677                         /* FIXME: Add a separate key for LMF to avoid this */
3678                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3679 #endif
3680                 } else {
3681                         g_assert (!cfg->compile_aot);
3682                         x86_push_imm (code, cfg->domain);
3683                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3684                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3685                 }
3686         }
3687
3688         if (method->save_lmf) {
3689                 pos += sizeof (MonoLMF);
3690
3691                 /* save the current IP */
3692                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3693                 x86_push_imm_template (code);
3694
3695                 /* save all caller saved regs */
3696                 x86_push_reg (code, X86_EBP);
3697                 x86_push_reg (code, X86_ESI);
3698                 x86_push_reg (code, X86_EDI);
3699                 x86_push_reg (code, X86_EBX);
3700
3701                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3702                         /*
3703                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3704                          * through the mono_lmf_addr TLS variable.
3705                          */
3706                         /* %eax = previous_lmf */
3707                         x86_prefix (code, X86_GS_PREFIX);
3708                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3709                         /* skip esp + method_info + lmf */
3710                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
3711                         /* push previous_lmf */
3712                         x86_push_reg (code, X86_EAX);
3713                         /* new lmf = ESP */
3714                         x86_prefix (code, X86_GS_PREFIX);
3715                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
3716                 } else {
3717                         /* get the address of lmf for the current thread */
3718                         /* 
3719                          * This is performance critical so we try to use some tricks to make
3720                          * it fast.
3721                          */                                                                        
3722
3723                         if (lmf_addr_tls_offset != -1) {
3724                                 /* Load lmf quicky using the GS register */
3725                                 code = emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
3726 #ifdef PLATFORM_WIN32
3727                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3728                                 /* FIXME: Add a separate key for LMF to avoid this */
3729                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3730 #endif
3731                         } else {
3732                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3733                         }
3734
3735                         /* Skip esp + method info */
3736                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3737
3738                         /* push lmf */
3739                         x86_push_reg (code, X86_EAX); 
3740                         /* push *lfm (previous_lmf) */
3741                         x86_push_membase (code, X86_EAX, 0);
3742                         /* *(lmf) = ESP */
3743                         x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3744                 }
3745         } else {
3746
3747                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3748                         x86_push_reg (code, X86_EBX);
3749                         pos += 4;
3750                 }
3751
3752                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3753                         x86_push_reg (code, X86_EDI);
3754                         pos += 4;
3755                 }
3756
3757                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3758                         x86_push_reg (code, X86_ESI);
3759                         pos += 4;
3760                 }
3761         }
3762
3763         alloc_size -= pos;
3764
3765 #if __APPLE__
3766         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3767         {
3768                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3769                 if (tot & 4) {
3770                         tot += 4;
3771                         alloc_size += 4;
3772                 }
3773                 if (tot & 8) {
3774                         alloc_size += 8;
3775                 }
3776         }
3777 #endif
3778
3779         if (alloc_size) {
3780                 /* See mono_emit_stack_alloc */
3781 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3782                 guint32 remaining_size = alloc_size;
3783                 while (remaining_size >= 0x1000) {
3784                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3785                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3786                         remaining_size -= 0x1000;
3787                 }
3788                 if (remaining_size)
3789                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3790 #else
3791                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3792 #endif
3793         }
3794
3795 #if __APPLE_
3796         /* check the stack is aligned */
3797         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3798         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3799         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3800         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3801         x86_breakpoint (code);
3802 #endif
3803
3804         /* compute max_offset in order to use short forward jumps */
3805         max_offset = 0;
3806         if (cfg->opt & MONO_OPT_BRANCH) {
3807                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3808                         MonoInst *ins;
3809                         bb->max_offset = max_offset;
3810
3811                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3812                                 max_offset += 6;
3813                         /* max alignment for loops */
3814                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3815                                 max_offset += LOOP_ALIGNMENT;
3816
3817                         MONO_INST_LIST_FOR_EACH_ENTRY (ins, &bb->ins_list, node) {
3818                                 if (ins->opcode == OP_LABEL)
3819                                         ins->inst_c1 = max_offset;
3820                                 
3821                                 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3822                         }
3823                 }
3824         }
3825
3826         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3827                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3828
3829         /* load arguments allocated to register from the stack */
3830         sig = mono_method_signature (method);
3831         pos = 0;
3832
3833         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3834                 inst = cfg->args [pos];
3835                 if (inst->opcode == OP_REGVAR) {
3836                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3837                         if (cfg->verbose_level > 2)
3838                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3839                 }
3840                 pos++;
3841         }
3842
3843         cfg->code_len = code - cfg->native_code;
3844
3845         return code;
3846 }
3847
3848 void
3849 mono_arch_emit_epilog (MonoCompile *cfg)
3850 {
3851         MonoMethod *method = cfg->method;
3852         MonoMethodSignature *sig = mono_method_signature (method);
3853         int quad, pos;
3854         guint32 stack_to_pop;
3855         guint8 *code;
3856         int max_epilog_size = 16;
3857         CallInfo *cinfo;
3858         
3859         if (cfg->method->save_lmf)
3860                 max_epilog_size += 128;
3861
3862         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3863                 cfg->code_size *= 2;
3864                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3865                 mono_jit_stats.code_reallocs++;
3866         }
3867
3868         code = cfg->native_code + cfg->code_len;
3869
3870         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3871                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3872
3873         /* the code restoring the registers must be kept in sync with OP_JMP */
3874         pos = 0;
3875         
3876         if (method->save_lmf) {
3877                 gint32 prev_lmf_reg;
3878                 gint32 lmf_offset = -sizeof (MonoLMF);
3879
3880                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3881                         /*
3882                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3883                          * through the mono_lmf_addr TLS variable.
3884                          */
3885                         /* reg = previous_lmf */
3886                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3887
3888                         /* lmf = previous_lmf */
3889                         x86_prefix (code, X86_GS_PREFIX);
3890                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
3891                 } else {
3892                         /* Find a spare register */
3893                         switch (mono_type_get_underlying_type (sig->ret)->type) {
3894                         case MONO_TYPE_I8:
3895                         case MONO_TYPE_U8:
3896                                 prev_lmf_reg = X86_EDI;
3897                                 cfg->used_int_regs |= (1 << X86_EDI);
3898                                 break;
3899                         default:
3900                                 prev_lmf_reg = X86_EDX;
3901                                 break;
3902                         }
3903
3904                         /* reg = previous_lmf */
3905                         x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3906
3907                         /* ecx = lmf */
3908                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3909
3910                         /* *(lmf) = previous_lmf */
3911                         x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3912                 }
3913
3914                 /* restore caller saved regs */
3915                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3916                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3917                 }
3918
3919                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3920                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3921                 }
3922                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3923                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3924                 }
3925
3926                 /* EBP is restored by LEAVE */
3927         } else {
3928                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3929                         pos -= 4;
3930                 }
3931                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3932                         pos -= 4;
3933                 }
3934                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3935                         pos -= 4;
3936                 }
3937
3938                 if (pos)
3939                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3940
3941                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3942                         x86_pop_reg (code, X86_ESI);
3943                 }
3944                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3945                         x86_pop_reg (code, X86_EDI);
3946                 }
3947                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3948                         x86_pop_reg (code, X86_EBX);
3949                 }
3950         }
3951
3952         /* Load returned vtypes into registers if needed */
3953         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
3954         if (cinfo->ret.storage == ArgValuetypeInReg) {
3955                 for (quad = 0; quad < 2; quad ++) {
3956                         switch (cinfo->ret.pair_storage [quad]) {
3957                         case ArgInIReg:
3958                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3959                                 break;
3960                         case ArgOnFloatFpStack:
3961                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3962                                 break;
3963                         case ArgOnDoubleFpStack:
3964                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3965                                 break;
3966                         case ArgNone:
3967                                 break;
3968                         default:
3969                                 g_assert_not_reached ();
3970                         }
3971                 }
3972         }
3973
3974         x86_leave (code);
3975
3976         if (CALLCONV_IS_STDCALL (sig)) {
3977                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3978
3979                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3980         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3981                 stack_to_pop = 4;
3982         else
3983                 stack_to_pop = 0;
3984
3985         if (stack_to_pop)
3986                 x86_ret_imm (code, stack_to_pop);
3987         else
3988                 x86_ret (code);
3989
3990         cfg->code_len = code - cfg->native_code;
3991
3992         g_assert (cfg->code_len < cfg->code_size);
3993 }
3994
3995 void
3996 mono_arch_emit_exceptions (MonoCompile *cfg)
3997 {
3998         MonoJumpInfo *patch_info;
3999         int nthrows, i;
4000         guint8 *code;
4001         MonoClass *exc_classes [16];
4002         guint8 *exc_throw_start [16], *exc_throw_end [16];
4003         guint32 code_size;
4004         int exc_count = 0;
4005
4006         /* Compute needed space */
4007         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4008                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4009                         exc_count++;
4010         }
4011
4012         /* 
4013          * make sure we have enough space for exceptions
4014          * 16 is the size of two push_imm instructions and a call
4015          */
4016         if (cfg->compile_aot)
4017                 code_size = exc_count * 32;
4018         else
4019                 code_size = exc_count * 16;
4020
4021         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4022                 cfg->code_size *= 2;
4023                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4024                 mono_jit_stats.code_reallocs++;
4025         }
4026
4027         code = cfg->native_code + cfg->code_len;
4028
4029         nthrows = 0;
4030         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4031                 switch (patch_info->type) {
4032                 case MONO_PATCH_INFO_EXC: {
4033                         MonoClass *exc_class;
4034                         guint8 *buf, *buf2;
4035                         guint32 throw_ip;
4036
4037                         x86_patch (patch_info->ip.i + cfg->native_code, code);
4038
4039                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4040                         g_assert (exc_class);
4041                         throw_ip = patch_info->ip.i;
4042
4043                         /* Find a throw sequence for the same exception class */
4044                         for (i = 0; i < nthrows; ++i)
4045                                 if (exc_classes [i] == exc_class)
4046                                         break;
4047                         if (i < nthrows) {
4048                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4049                                 x86_jump_code (code, exc_throw_start [i]);
4050                                 patch_info->type = MONO_PATCH_INFO_NONE;
4051                         }
4052                         else {
4053                                 guint32 size;
4054
4055                                 /* Compute size of code following the push <OFFSET> */
4056                                 size = 5 + 5;
4057
4058                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4059                                         /* Use the shorter form */
4060                                         buf = buf2 = code;
4061                                         x86_push_imm (code, 0);
4062                                 }
4063                                 else {
4064                                         buf = code;
4065                                         x86_push_imm (code, 0xf0f0f0f0);
4066                                         buf2 = code;
4067                                 }
4068
4069                                 if (nthrows < 16) {
4070                                         exc_classes [nthrows] = exc_class;
4071                                         exc_throw_start [nthrows] = code;
4072                                 }
4073
4074                                 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
4075                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4076                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4077                                 patch_info->ip.i = code - cfg->native_code;
4078                                 x86_call_code (code, 0);
4079                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4080                                 while (buf < buf2)
4081                                         x86_nop (buf);
4082
4083                                 if (nthrows < 16) {
4084                                         exc_throw_end [nthrows] = code;
4085                                         nthrows ++;
4086                                 }
4087                         }
4088                         break;
4089                 }
4090                 default:
4091                         /* do nothing */
4092                         break;
4093                 }
4094         }
4095
4096         cfg->code_len = code - cfg->native_code;
4097
4098         g_assert (cfg->code_len < cfg->code_size);
4099 }
4100
4101 void
4102 mono_arch_flush_icache (guint8 *code, gint size)
4103 {
4104         /* not needed */
4105 }
4106
4107 void
4108 mono_arch_flush_register_windows (void)
4109 {
4110 }
4111
4112 /*
4113  * Support for fast access to the thread-local lmf structure using the GS
4114  * segment register on NPTL + kernel 2.6.x.
4115  */
4116
4117 static gboolean tls_offset_inited = FALSE;
4118
4119 void
4120 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4121 {
4122         if (!tls_offset_inited) {
4123                 if (!getenv ("MONO_NO_TLS")) {
4124 #ifdef PLATFORM_WIN32
4125                         /* 
4126                          * We need to init this multiple times, since when we are first called, the key might not
4127                          * be initialized yet.
4128                          */
4129                         appdomain_tls_offset = mono_domain_get_tls_key ();
4130                         lmf_tls_offset = mono_get_jit_tls_key ();
4131                         thread_tls_offset = mono_thread_get_tls_key ();
4132
4133                         /* Only 64 tls entries can be accessed using inline code */
4134                         if (appdomain_tls_offset >= 64)
4135                                 appdomain_tls_offset = -1;
4136                         if (lmf_tls_offset >= 64)
4137                                 lmf_tls_offset = -1;
4138                         if (thread_tls_offset >= 64)
4139                                 thread_tls_offset = -1;
4140 #else
4141 #if MONO_XEN_OPT
4142                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
4143 #endif
4144                         tls_offset_inited = TRUE;
4145                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4146                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4147                         lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
4148                         thread_tls_offset = mono_thread_get_tls_offset ();
4149 #endif
4150                 }
4151         }               
4152 }
4153
4154 void
4155 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4156 {
4157 }
4158
4159 void
4160 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4161 {
4162         MonoCallInst *call = (MonoCallInst*)inst;
4163         CallInfo *cinfo = get_call_info (cfg, cfg->mempool, inst->signature, FALSE);
4164
4165         /* add the this argument */
4166         if (this_reg != -1) {
4167                 if (cinfo->args [0].storage == ArgInIReg) {
4168                         MonoInst *this;
4169                         MONO_INST_NEW (cfg, this, OP_MOVE);
4170                         this->type = this_type;
4171                         this->sreg1 = this_reg;
4172                         this->dreg = mono_regstate_next_int (cfg->rs);
4173                         mono_bblock_add_inst (cfg->cbb, this);
4174
4175                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
4176                 }
4177                 else {
4178                         MonoInst *this;
4179                         MONO_INST_NEW (cfg, this, OP_OUTARG);
4180                         this->type = this_type;
4181                         this->sreg1 = this_reg;
4182                         mono_bblock_add_inst (cfg->cbb, this);
4183                 }
4184         }
4185
4186         if (vt_reg != -1) {
4187                 MonoInst *vtarg;
4188
4189                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4190                         /*
4191                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4192                          * the stack. Save the address here, so the call instruction can
4193                          * access it.
4194                          */
4195                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4196                         vtarg->inst_destbasereg = X86_ESP;
4197                         vtarg->inst_offset = inst->stack_usage;
4198                         vtarg->sreg1 = vt_reg;
4199                         mono_bblock_add_inst (cfg->cbb, vtarg);
4200                 }
4201                 else if (cinfo->ret.storage == ArgInIReg) {
4202                         /* The return address is passed in a register */
4203                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
4204                         vtarg->sreg1 = vt_reg;
4205                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
4206                         mono_bblock_add_inst (cfg->cbb, vtarg);
4207
4208                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
4209                 } else {
4210                         MonoInst *vtarg;
4211                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4212                         vtarg->type = STACK_MP;
4213                         vtarg->sreg1 = vt_reg;
4214                         mono_bblock_add_inst (cfg->cbb, vtarg);
4215                 }
4216         }
4217 }
4218
4219 #ifdef MONO_ARCH_HAVE_IMT
4220
4221 // Linear handler, the bsearch head compare is shorter
4222 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
4223 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
4224 //        x86_patch(ins,target)
4225 //[1 + 5] x86_jump_mem(inst,mem)
4226
4227 #define CMP_SIZE 6
4228 #define BR_SMALL_SIZE 2
4229 #define BR_LARGE_SIZE 5
4230 #define JUMP_IMM_SIZE 6
4231 #define ENABLE_WRONG_METHOD_CHECK 0
4232
4233 static int
4234 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
4235 {
4236         int i, distance = 0;
4237         for (i = start; i < target; ++i)
4238                 distance += imt_entries [i]->chunk_size;
4239         return distance;
4240 }
4241
4242 /*
4243  * LOCKING: called with the domain lock held
4244  */
4245 gpointer
4246 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count)
4247 {
4248         int i;
4249         int size = 0;
4250         guint8 *code, *start;
4251
4252         for (i = 0; i < count; ++i) {
4253                 MonoIMTCheckItem *item = imt_entries [i];
4254                 if (item->is_equals) {
4255                         if (item->check_target_idx) {
4256                                 if (!item->compare_done)
4257                                         item->chunk_size += CMP_SIZE;
4258                                 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
4259                         } else {
4260                                 item->chunk_size += JUMP_IMM_SIZE;
4261 #if ENABLE_WRONG_METHOD_CHECK
4262                                 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
4263 #endif
4264                         }
4265                 } else {
4266                         item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
4267                         imt_entries [item->check_target_idx]->compare_done = TRUE;
4268                 }
4269                 size += item->chunk_size;
4270         }
4271         code = mono_code_manager_reserve (domain->code_mp, size);
4272         start = code;
4273         for (i = 0; i < count; ++i) {
4274                 MonoIMTCheckItem *item = imt_entries [i];
4275                 item->code_target = code;
4276                 if (item->is_equals) {
4277                         if (item->check_target_idx) {
4278                                 if (!item->compare_done)
4279                                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4280                                 item->jmp_code = code;
4281                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);
4282                                 x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
4283                         } else {
4284                                 /* enable the commented code to assert on wrong method */
4285 #if ENABLE_WRONG_METHOD_CHECK
4286                                 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4287                                 item->jmp_code = code;
4288                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);
4289 #endif
4290                                 x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
4291 #if ENABLE_WRONG_METHOD_CHECK
4292                                 x86_patch (item->jmp_code, code);
4293                                 x86_breakpoint (code);
4294                                 item->jmp_code = NULL;
4295 #endif
4296                         }
4297                 } else {
4298                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4299                         item->jmp_code = code;
4300                         if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
4301                                 x86_branch8 (code, X86_CC_GE, 0, FALSE);
4302                         else
4303                                 x86_branch32 (code, X86_CC_GE, 0, FALSE);
4304                 }
4305         }
4306         /* patch the branches to get to the target items */
4307         for (i = 0; i < count; ++i) {
4308                 MonoIMTCheckItem *item = imt_entries [i];
4309                 if (item->jmp_code) {
4310                         if (item->check_target_idx) {
4311                                 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
4312                         }
4313                 }
4314         }
4315                 
4316         mono_stats.imt_thunks_size += code - start;
4317         g_assert (code - start <= size);
4318         return start;
4319 }
4320
4321 MonoMethod*
4322 mono_arch_find_imt_method (gpointer *regs, guint8 *code)
4323 {
4324         return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
4325 }
4326
4327 MonoObject*
4328 mono_arch_find_this_argument (gpointer *regs, MonoMethod *method)
4329 {
4330         MonoMethodSignature *sig = mono_method_signature (method);
4331         CallInfo *cinfo = get_call_info (NULL, NULL, sig, FALSE);
4332         int this_argument_offset;
4333         MonoObject *this_argument;
4334
4335         /* 
4336          * this is the offset of the this arg from esp as saved at the start of 
4337          * mono_arch_create_trampoline_code () in tramp-x86.c.
4338          */
4339         this_argument_offset = 5;
4340         if (MONO_TYPE_ISSTRUCT (sig->ret) && (cinfo->ret.storage == ArgOnStack))
4341                 this_argument_offset++;
4342
4343         this_argument = * (MonoObject**) (((guint8*) regs [X86_ESP]) + this_argument_offset * sizeof (gpointer));
4344
4345         g_free (cinfo);
4346         return this_argument;
4347 }
4348 #endif
4349
4350 MonoInst*
4351 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4352 {
4353         MonoInst *ins = NULL;
4354
4355         if (cmethod->klass == mono_defaults.math_class) {
4356                 if (strcmp (cmethod->name, "Sin") == 0) {
4357                         MONO_INST_NEW (cfg, ins, OP_SIN);
4358                         ins->inst_i0 = args [0];
4359                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4360                         MONO_INST_NEW (cfg, ins, OP_COS);
4361                         ins->inst_i0 = args [0];
4362                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4363                         MONO_INST_NEW (cfg, ins, OP_TAN);
4364                         ins->inst_i0 = args [0];
4365                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4366                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4367                         ins->inst_i0 = args [0];
4368                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4369                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4370                         ins->inst_i0 = args [0];
4371                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4372                         MONO_INST_NEW (cfg, ins, OP_ABS);
4373                         ins->inst_i0 = args [0];
4374                 }
4375 #if 0
4376                 /* OP_FREM is not IEEE compatible */
4377                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4378                         MONO_INST_NEW (cfg, ins, OP_FREM);
4379                         ins->inst_i0 = args [0];
4380                         ins->inst_i1 = args [1];
4381                 }
4382 #endif
4383         }
4384
4385         return ins;
4386 }
4387
4388
4389 gboolean
4390 mono_arch_print_tree (MonoInst *tree, int arity)
4391 {
4392         return 0;
4393 }
4394
4395 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4396 {
4397         MonoInst* ins;
4398         
4399         if (appdomain_tls_offset == -1)
4400                 return NULL;
4401
4402         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4403         ins->inst_offset = appdomain_tls_offset;
4404         return ins;
4405 }
4406
4407 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4408 {
4409         MonoInst* ins;
4410
4411         if (thread_tls_offset == -1)
4412                 return NULL;
4413
4414         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4415         ins->inst_offset = thread_tls_offset;
4416         return ins;
4417 }
4418
4419 guint32
4420 mono_arch_get_patch_offset (guint8 *code)
4421 {
4422         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4423                 return 2;
4424         else if ((code [0] == 0xba))
4425                 return 1;
4426         else if ((code [0] == 0x68))
4427                 /* push IMM */
4428                 return 1;
4429         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4430                 /* push <OFFSET>(<REG>) */
4431                 return 2;
4432         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4433                 /* call *<OFFSET>(<REG>) */
4434                 return 2;
4435         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4436                 /* fldl <ADDR> */
4437                 return 2;
4438         else if ((code [0] == 0x58) && (code [1] == 0x05))
4439                 /* pop %eax; add <OFFSET>, %eax */
4440                 return 2;
4441         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4442                 /* pop <REG>; add <OFFSET>, <REG> */
4443                 return 3;
4444         else {
4445                 g_assert_not_reached ();
4446                 return -1;
4447         }
4448 }
4449
4450 gboolean
4451 mono_breakpoint_clean_code (guint8 *code, guint8 *buf, int size)
4452 {
4453         int i;
4454         gboolean can_write = TRUE;
4455         memcpy (buf, code, size);
4456         for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
4457                 int idx = mono_breakpoint_info_index [i];
4458                 guint8 *ptr;
4459                 if (idx < 1)
4460                         continue;
4461                 ptr = mono_breakpoint_info [idx].address;
4462                 if (ptr >= code && ptr < code + size) {
4463                         guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
4464                         can_write = FALSE;
4465                         /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
4466                         buf [ptr - code] = saved_byte;
4467                 }
4468         }
4469         return can_write;
4470 }
4471
4472 gpointer
4473 mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
4474 {
4475         guint8 buf [8];
4476         guint8 reg = 0;
4477         gint32 disp = 0;
4478
4479         mono_breakpoint_clean_code (code - 8, buf, sizeof (buf));
4480         code = buf + 8;
4481
4482         *displacement = 0;
4483
4484         /* go to the start of the call instruction
4485          *
4486          * address_byte = (m << 6) | (o << 3) | reg
4487          * call opcode: 0xff address_byte displacement
4488          * 0xff m=1,o=2 imm8
4489          * 0xff m=2,o=2 imm32
4490          */
4491         code -= 6;
4492
4493         /* 
4494          * A given byte sequence can match more than case here, so we have to be
4495          * really careful about the ordering of the cases. Longer sequences
4496          * come first.
4497          */
4498         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
4499                 /*
4500                  * This is an interface call
4501                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
4502                  * ff 10                   call   *(%eax)
4503                  */
4504                 reg = x86_modrm_rm (code [5]);
4505                 disp = 0;
4506 #ifdef MONO_ARCH_HAVE_IMT
4507         } else if ((code [-2] == 0xba) && (code [3] == 0xff) && (x86_modrm_mod (code [4]) == 1) && (x86_modrm_reg (code [4]) == 2) && ((signed char)code [5] < 0)) {
4508                 /* IMT-based interface calls: with MONO_ARCH_IMT_REG == edx
4509                  * ba 14 f8 28 08          mov    $0x828f814,%edx
4510                  * ff 50 fc                call   *0xfffffffc(%eax)
4511                  */
4512                 reg = code [4] & 0x07;
4513                 disp = (signed char)code [5];
4514 #endif
4515         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4516                 reg = code [4] & 0x07;
4517                 disp = (signed char)code [5];
4518         } else {
4519                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4520                         reg = code [1] & 0x07;
4521                         disp = *((gint32*)(code + 2));
4522                 } else if ((code [1] == 0xe8)) {
4523                         return NULL;
4524                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4525                         /*
4526                          * This is a interface call
4527                          * 8b 40 30   mov    0x30(%eax),%eax
4528                          * ff 10      call   *(%eax)
4529                          */
4530                         disp = 0;
4531                         reg = code [5] & 0x07;
4532                 }
4533                 else
4534                         return NULL;
4535         }
4536
4537         *displacement = disp;
4538         return regs [reg];
4539 }
4540
4541 gpointer*
4542 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4543 {
4544         gpointer vt;
4545         int displacement;
4546         vt = mono_arch_get_vcall_slot (code, regs, &displacement);
4547         if (!vt)
4548                 return NULL;
4549         return (gpointer*)((char*)vt + displacement);
4550 }
4551
4552 gpointer
4553 mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code)
4554 {
4555         guint32 esp = regs [X86_ESP];
4556         CallInfo *cinfo;
4557         gpointer res;
4558
4559         cinfo = get_call_info (NULL, NULL, sig, FALSE);
4560
4561         /*
4562          * The stack looks like:
4563          * <other args>
4564          * <this=delegate>
4565          * <possible vtype return address>
4566          * <return addr>
4567          * <4 pointers pushed by mono_arch_create_trampoline_code ()>
4568          */
4569         res = (((MonoObject**)esp) [5 + (cinfo->args [0].offset / 4)]);
4570         g_free (cinfo);
4571         return res;
4572 }
4573
4574 #define MAX_ARCH_DELEGATE_PARAMS 10
4575
4576 gpointer
4577 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
4578 {
4579         guint8 *code, *start;
4580
4581         if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
4582                 return NULL;
4583
4584         /* FIXME: Support more cases */
4585         if (MONO_TYPE_ISSTRUCT (sig->ret))
4586                 return NULL;
4587
4588         /*
4589          * The stack contains:
4590          * <delegate>
4591          * <return addr>
4592          */
4593
4594         if (has_target) {
4595                 static guint8* cached = NULL;
4596                 mono_mini_arch_lock ();
4597                 if (cached) {
4598                         mono_mini_arch_unlock ();
4599                         return cached;
4600                 }
4601                 
4602                 start = code = mono_global_codeman_reserve (64);
4603
4604                 /* Replace the this argument with the target */
4605                 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4606                 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
4607                 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
4608                 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4609
4610                 g_assert ((code - start) < 64);
4611
4612                 cached = start;
4613                 mono_debug_add_delegate_trampoline (start, code - start);
4614                 mono_mini_arch_unlock ();
4615         } else {
4616                 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
4617                 int i = 0;
4618                 /* 8 for mov_reg and jump, plus 8 for each parameter */
4619                 int code_reserve = 8 + (sig->param_count * 8);
4620
4621                 for (i = 0; i < sig->param_count; ++i)
4622                         if (!mono_is_regsize_var (sig->params [i]))
4623                                 return NULL;
4624
4625                 mono_mini_arch_lock ();
4626                 code = cache [sig->param_count];
4627                 if (code) {
4628                         mono_mini_arch_unlock ();
4629                         return code;
4630                 }
4631
4632                 /*
4633                  * The stack contains:
4634                  * <args in reverse order>
4635                  * <delegate>
4636                  * <return addr>
4637                  *
4638                  * and we need:
4639                  * <args in reverse order>
4640                  * <return addr>
4641                  * 
4642                  * without unbalancing the stack.
4643                  * So move each arg up a spot in the stack (overwriting un-needed 'this' arg)
4644                  * and leaving original spot of first arg as placeholder in stack so
4645                  * when callee pops stack everything works.
4646                  */
4647
4648                 start = code = mono_global_codeman_reserve (code_reserve);
4649
4650                 /* store delegate for access to method_ptr */
4651                 x86_mov_reg_membase (code, X86_ECX, X86_ESP, 4, 4);
4652
4653                 /* move args up */
4654                 for (i = 0; i < sig->param_count; ++i) {
4655                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, (i+2)*4, 4);
4656                         x86_mov_membase_reg (code, X86_ESP, (i+1)*4, X86_EAX, 4);
4657                 }
4658
4659                 x86_jump_membase (code, X86_ECX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4660
4661                 g_assert ((code - start) < code_reserve);
4662
4663                 cache [sig->param_count] = start;
4664
4665                 mono_debug_add_delegate_trampoline (start, code - start);
4666                 mono_mini_arch_unlock ();
4667         }
4668
4669         return start;
4670 }