New tests, update
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
17
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/utils/mono-math.h>
23
24 #include "trace.h"
25 #include "mini-x86.h"
26 #include "inssel.h"
27 #include "cpu-x86.h"
28
29 /* On windows, these hold the key returned by TlsAlloc () */
30 static gint lmf_tls_offset = -1;
31 static gint lmf_addr_tls_offset = -1;
32 static gint appdomain_tls_offset = -1;
33 static gint thread_tls_offset = -1;
34
35 #ifdef MONO_XEN_OPT
36 static gboolean optimize_for_xen = TRUE;
37 #else
38 #define optimize_for_xen 0
39 #endif
40
41 #ifdef PLATFORM_WIN32
42 static gboolean is_win32 = TRUE;
43 #else
44 static gboolean is_win32 = FALSE;
45 #endif
46
47 /* This mutex protects architecture specific caches */
48 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
49 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
50 static CRITICAL_SECTION mini_arch_mutex;
51
52 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
53
54 #define ARGS_OFFSET 8
55
56 #ifdef PLATFORM_WIN32
57 /* Under windows, the default pinvoke calling convention is stdcall */
58 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
59 #else
60 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
61 #endif
62
63 #define NOT_IMPLEMENTED g_assert_not_reached ()
64
65 MonoBreakpointInfo
66 mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
67
68 const char*
69 mono_arch_regname (int reg) {
70         switch (reg) {
71         case X86_EAX: return "%eax";
72         case X86_EBX: return "%ebx";
73         case X86_ECX: return "%ecx";
74         case X86_EDX: return "%edx";
75         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
76         case X86_EDI: return "%edi";
77         case X86_ESI: return "%esi";
78         }
79         return "unknown";
80 }
81
82 const char*
83 mono_arch_fregname (int reg) {
84         return "unknown";
85 }
86
87 typedef enum {
88         ArgInIReg,
89         ArgInFloatSSEReg,
90         ArgInDoubleSSEReg,
91         ArgOnStack,
92         ArgValuetypeInReg,
93         ArgOnFloatFpStack,
94         ArgOnDoubleFpStack,
95         ArgNone
96 } ArgStorage;
97
98 typedef struct {
99         gint16 offset;
100         gint8  reg;
101         ArgStorage storage;
102
103         /* Only if storage == ArgValuetypeInReg */
104         ArgStorage pair_storage [2];
105         gint8 pair_regs [2];
106 } ArgInfo;
107
108 typedef struct {
109         int nargs;
110         guint32 stack_usage;
111         guint32 reg_usage;
112         guint32 freg_usage;
113         gboolean need_stack_align;
114         guint32 stack_align_amount;
115         ArgInfo ret;
116         ArgInfo sig_cookie;
117         ArgInfo args [1];
118 } CallInfo;
119
120 #define PARAM_REGS 0
121
122 #define FLOAT_PARAM_REGS 0
123
124 static X86_Reg_No param_regs [] = { 0 };
125
126 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
127 #define SMALL_STRUCTS_IN_REGS
128 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
129 #endif
130
131 static void inline
132 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
133 {
134     ainfo->offset = *stack_size;
135
136     if (*gr >= PARAM_REGS) {
137                 ainfo->storage = ArgOnStack;
138                 (*stack_size) += sizeof (gpointer);
139     }
140     else {
141                 ainfo->storage = ArgInIReg;
142                 ainfo->reg = param_regs [*gr];
143                 (*gr) ++;
144     }
145 }
146
147 static void inline
148 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
149 {
150         ainfo->offset = *stack_size;
151
152         g_assert (PARAM_REGS == 0);
153         
154         ainfo->storage = ArgOnStack;
155         (*stack_size) += sizeof (gpointer) * 2;
156 }
157
158 static void inline
159 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
160 {
161     ainfo->offset = *stack_size;
162
163     if (*gr >= FLOAT_PARAM_REGS) {
164                 ainfo->storage = ArgOnStack;
165                 (*stack_size) += is_double ? 8 : 4;
166     }
167     else {
168                 /* A double register */
169                 if (is_double)
170                         ainfo->storage = ArgInDoubleSSEReg;
171                 else
172                         ainfo->storage = ArgInFloatSSEReg;
173                 ainfo->reg = *gr;
174                 (*gr) += 1;
175     }
176 }
177
178
179 static void
180 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
181                gboolean is_return,
182                guint32 *gr, guint32 *fr, guint32 *stack_size)
183 {
184         guint32 size;
185         MonoClass *klass;
186
187         klass = mono_class_from_mono_type (type);
188         if (sig->pinvoke) 
189                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
190         else 
191                 size = mini_type_stack_size (gsctx, &klass->byval_arg, NULL);
192
193 #ifdef SMALL_STRUCTS_IN_REGS
194         if (sig->pinvoke && is_return) {
195                 MonoMarshalType *info;
196
197                 /*
198                  * the exact rules are not very well documented, the code below seems to work with the 
199                  * code generated by gcc 3.3.3 -mno-cygwin.
200                  */
201                 info = mono_marshal_load_type_info (klass);
202                 g_assert (info);
203
204                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
205
206                 /* Special case structs with only a float member */
207                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
208                         ainfo->storage = ArgValuetypeInReg;
209                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
210                         return;
211                 }
212                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
213                         ainfo->storage = ArgValuetypeInReg;
214                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
215                         return;
216                 }               
217                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
218                         ainfo->storage = ArgValuetypeInReg;
219                         ainfo->pair_storage [0] = ArgInIReg;
220                         ainfo->pair_regs [0] = return_regs [0];
221                         if (info->native_size > 4) {
222                                 ainfo->pair_storage [1] = ArgInIReg;
223                                 ainfo->pair_regs [1] = return_regs [1];
224                         }
225                         return;
226                 }
227         }
228 #endif
229
230         ainfo->offset = *stack_size;
231         ainfo->storage = ArgOnStack;
232         *stack_size += ALIGN_TO (size, sizeof (gpointer));
233 }
234
235 /*
236  * get_call_info:
237  *
238  *  Obtain information about a call according to the calling convention.
239  * For x86 ELF, see the "System V Application Binary Interface Intel386 
240  * Architecture Processor Supplment, Fourth Edition" document for more
241  * information.
242  * For x86 win32, see ???.
243  */
244 static CallInfo*
245 get_call_info (MonoCompile *cfg, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
246 {
247         guint32 i, gr, fr;
248         MonoType *ret_type;
249         int n = sig->hasthis + sig->param_count;
250         guint32 stack_size = 0;
251         CallInfo *cinfo;
252         MonoGenericSharingContext *gsctx = cfg ? cfg->generic_sharing_context : NULL;
253
254         if (mp)
255                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
256         else
257                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
258
259         gr = 0;
260         fr = 0;
261
262         /* return value */
263         {
264                 ret_type = mono_type_get_underlying_type (sig->ret);
265                 ret_type = mini_get_basic_type_from_generic (gsctx, ret_type);
266                 switch (ret_type->type) {
267                 case MONO_TYPE_BOOLEAN:
268                 case MONO_TYPE_I1:
269                 case MONO_TYPE_U1:
270                 case MONO_TYPE_I2:
271                 case MONO_TYPE_U2:
272                 case MONO_TYPE_CHAR:
273                 case MONO_TYPE_I4:
274                 case MONO_TYPE_U4:
275                 case MONO_TYPE_I:
276                 case MONO_TYPE_U:
277                 case MONO_TYPE_PTR:
278                 case MONO_TYPE_FNPTR:
279                 case MONO_TYPE_CLASS:
280                 case MONO_TYPE_OBJECT:
281                 case MONO_TYPE_SZARRAY:
282                 case MONO_TYPE_ARRAY:
283                 case MONO_TYPE_STRING:
284                         cinfo->ret.storage = ArgInIReg;
285                         cinfo->ret.reg = X86_EAX;
286                         break;
287                 case MONO_TYPE_U8:
288                 case MONO_TYPE_I8:
289                         cinfo->ret.storage = ArgInIReg;
290                         cinfo->ret.reg = X86_EAX;
291                         break;
292                 case MONO_TYPE_R4:
293                         cinfo->ret.storage = ArgOnFloatFpStack;
294                         break;
295                 case MONO_TYPE_R8:
296                         cinfo->ret.storage = ArgOnDoubleFpStack;
297                         break;
298                 case MONO_TYPE_GENERICINST:
299                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
300                                 cinfo->ret.storage = ArgInIReg;
301                                 cinfo->ret.reg = X86_EAX;
302                                 break;
303                         }
304                         /* Fall through */
305                 case MONO_TYPE_VALUETYPE: {
306                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
307
308                         add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
309                         if (cinfo->ret.storage == ArgOnStack)
310                                 /* The caller passes the address where the value is stored */
311                                 add_general (&gr, &stack_size, &cinfo->ret);
312                         break;
313                 }
314                 case MONO_TYPE_TYPEDBYREF:
315                         /* Same as a valuetype with size 24 */
316                         add_general (&gr, &stack_size, &cinfo->ret);
317                         ;
318                         break;
319                 case MONO_TYPE_VOID:
320                         cinfo->ret.storage = ArgNone;
321                         break;
322                 default:
323                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
324                 }
325         }
326
327         /* this */
328         if (sig->hasthis)
329                 add_general (&gr, &stack_size, cinfo->args + 0);
330
331         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
332                 gr = PARAM_REGS;
333                 fr = FLOAT_PARAM_REGS;
334                 
335                 /* Emit the signature cookie just before the implicit arguments */
336                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
337         }
338
339         for (i = 0; i < sig->param_count; ++i) {
340                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
341                 MonoType *ptype;
342
343                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
344                         /* We allways pass the sig cookie on the stack for simplicity */
345                         /* 
346                          * Prevent implicit arguments + the sig cookie from being passed 
347                          * in registers.
348                          */
349                         gr = PARAM_REGS;
350                         fr = FLOAT_PARAM_REGS;
351
352                         /* Emit the signature cookie just before the implicit arguments */
353                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
354                 }
355
356                 if (sig->params [i]->byref) {
357                         add_general (&gr, &stack_size, ainfo);
358                         continue;
359                 }
360                 ptype = mono_type_get_underlying_type (sig->params [i]);
361                 ptype = mini_get_basic_type_from_generic (gsctx, ptype);
362                 switch (ptype->type) {
363                 case MONO_TYPE_BOOLEAN:
364                 case MONO_TYPE_I1:
365                 case MONO_TYPE_U1:
366                         add_general (&gr, &stack_size, ainfo);
367                         break;
368                 case MONO_TYPE_I2:
369                 case MONO_TYPE_U2:
370                 case MONO_TYPE_CHAR:
371                         add_general (&gr, &stack_size, ainfo);
372                         break;
373                 case MONO_TYPE_I4:
374                 case MONO_TYPE_U4:
375                         add_general (&gr, &stack_size, ainfo);
376                         break;
377                 case MONO_TYPE_I:
378                 case MONO_TYPE_U:
379                 case MONO_TYPE_PTR:
380                 case MONO_TYPE_FNPTR:
381                 case MONO_TYPE_CLASS:
382                 case MONO_TYPE_OBJECT:
383                 case MONO_TYPE_STRING:
384                 case MONO_TYPE_SZARRAY:
385                 case MONO_TYPE_ARRAY:
386                         add_general (&gr, &stack_size, ainfo);
387                         break;
388                 case MONO_TYPE_GENERICINST:
389                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
390                                 add_general (&gr, &stack_size, ainfo);
391                                 break;
392                         }
393                         /* Fall through */
394                 case MONO_TYPE_VALUETYPE:
395                         add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
396                         break;
397                 case MONO_TYPE_TYPEDBYREF:
398                         stack_size += sizeof (MonoTypedRef);
399                         ainfo->storage = ArgOnStack;
400                         break;
401                 case MONO_TYPE_U8:
402                 case MONO_TYPE_I8:
403                         add_general_pair (&gr, &stack_size, ainfo);
404                         break;
405                 case MONO_TYPE_R4:
406                         add_float (&fr, &stack_size, ainfo, FALSE);
407                         break;
408                 case MONO_TYPE_R8:
409                         add_float (&fr, &stack_size, ainfo, TRUE);
410                         break;
411                 default:
412                         g_error ("unexpected type 0x%x", ptype->type);
413                         g_assert_not_reached ();
414                 }
415         }
416
417         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
418                 gr = PARAM_REGS;
419                 fr = FLOAT_PARAM_REGS;
420                 
421                 /* Emit the signature cookie just before the implicit arguments */
422                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
423         }
424
425 #if defined(__APPLE__)
426         if ((stack_size % 16) != 0) { 
427                 cinfo->need_stack_align = TRUE;
428                 stack_size += cinfo->stack_align_amount = 16-(stack_size % 16);
429         }
430 #endif
431
432         cinfo->stack_usage = stack_size;
433         cinfo->reg_usage = gr;
434         cinfo->freg_usage = fr;
435         return cinfo;
436 }
437
438 /*
439  * mono_arch_get_argument_info:
440  * @csig:  a method signature
441  * @param_count: the number of parameters to consider
442  * @arg_info: an array to store the result infos
443  *
444  * Gathers information on parameters such as size, alignment and
445  * padding. arg_info should be large enought to hold param_count + 1 entries. 
446  *
447  * Returns the size of the activation frame.
448  */
449 int
450 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
451 {
452         int k, frame_size = 0;
453         int size, pad;
454         guint32 align;
455         int offset = 8;
456         CallInfo *cinfo;
457
458         cinfo = get_call_info (NULL, NULL, csig, FALSE);
459
460         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
461                 frame_size += sizeof (gpointer);
462                 offset += 4;
463         }
464
465         arg_info [0].offset = offset;
466
467         if (csig->hasthis) {
468                 frame_size += sizeof (gpointer);
469                 offset += 4;
470         }
471
472         arg_info [0].size = frame_size;
473
474         for (k = 0; k < param_count; k++) {
475                 
476                 if (csig->pinvoke)
477                         size = mono_type_native_stack_size (csig->params [k], &align);
478                 else {
479                         int ialign;
480                         size = mini_type_stack_size (NULL, csig->params [k], &ialign);
481                         align = ialign;
482                 }
483
484                 /* ignore alignment for now */
485                 align = 1;
486
487                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
488                 arg_info [k].pad = pad;
489                 frame_size += size;
490                 arg_info [k + 1].pad = 0;
491                 arg_info [k + 1].size = size;
492                 offset += pad;
493                 arg_info [k + 1].offset = offset;
494                 offset += size;
495         }
496
497         align = MONO_ARCH_FRAME_ALIGNMENT;
498         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
499         arg_info [k].pad = pad;
500
501         g_free (cinfo);
502
503         return frame_size;
504 }
505
506 static const guchar cpuid_impl [] = {
507         0x55,                           /* push   %ebp */
508         0x89, 0xe5,                     /* mov    %esp,%ebp */
509         0x53,                           /* push   %ebx */
510         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
511         0x0f, 0xa2,                     /* cpuid   */
512         0x50,                           /* push   %eax */
513         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
514         0x89, 0x18,                     /* mov    %ebx,(%eax) */
515         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
516         0x89, 0x08,                     /* mov    %ecx,(%eax) */
517         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
518         0x89, 0x10,                     /* mov    %edx,(%eax) */
519         0x58,                           /* pop    %eax */
520         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
521         0x89, 0x02,                     /* mov    %eax,(%edx) */
522         0x5b,                           /* pop    %ebx */
523         0xc9,                           /* leave   */
524         0xc3,                           /* ret     */
525 };
526
527 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
528
529 static int 
530 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
531 {
532         int have_cpuid = 0;
533 #ifndef _MSC_VER
534         __asm__  __volatile__ (
535                 "pushfl\n"
536                 "popl %%eax\n"
537                 "movl %%eax, %%edx\n"
538                 "xorl $0x200000, %%eax\n"
539                 "pushl %%eax\n"
540                 "popfl\n"
541                 "pushfl\n"
542                 "popl %%eax\n"
543                 "xorl %%edx, %%eax\n"
544                 "andl $0x200000, %%eax\n"
545                 "movl %%eax, %0"
546                 : "=r" (have_cpuid)
547                 :
548                 : "%eax", "%edx"
549         );
550 #else
551         __asm {
552                 pushfd
553                 pop eax
554                 mov edx, eax
555                 xor eax, 0x200000
556                 push eax
557                 popfd
558                 pushfd
559                 pop eax
560                 xor eax, edx
561                 and eax, 0x200000
562                 mov have_cpuid, eax
563         }
564 #endif
565         if (have_cpuid) {
566                 /* Have to use the code manager to get around WinXP DEP */
567                 static CpuidFunc func = NULL;
568                 void *ptr;
569                 if (!func) {
570                         ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
571                         memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
572                         func = (CpuidFunc)ptr;
573                 }
574                 func (id, p_eax, p_ebx, p_ecx, p_edx);
575
576                 /*
577                  * We use this approach because of issues with gcc and pic code, see:
578                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
579                 __asm__ __volatile__ ("cpuid"
580                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
581                         : "a" (id));
582                 */
583                 return 1;
584         }
585         return 0;
586 }
587
588 /*
589  * Initialize the cpu to execute managed code.
590  */
591 void
592 mono_arch_cpu_init (void)
593 {
594         /* spec compliance requires running with double precision */
595 #ifndef _MSC_VER
596         guint16 fpcw;
597
598         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
599         fpcw &= ~X86_FPCW_PRECC_MASK;
600         fpcw |= X86_FPCW_PREC_DOUBLE;
601         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
602         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
603 #else
604         _control87 (_PC_53, MCW_PC);
605 #endif
606 }
607
608 /*
609  * Initialize architecture specific code.
610  */
611 void
612 mono_arch_init (void)
613 {
614         InitializeCriticalSection (&mini_arch_mutex);
615 }
616
617 /*
618  * Cleanup architecture specific code.
619  */
620 void
621 mono_arch_cleanup (void)
622 {
623         DeleteCriticalSection (&mini_arch_mutex);
624 }
625
626 /*
627  * This function returns the optimizations supported on this cpu.
628  */
629 guint32
630 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
631 {
632         int eax, ebx, ecx, edx;
633         guint32 opts = 0;
634         
635         *exclude_mask = 0;
636         /* Feature Flags function, flags returned in EDX. */
637         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
638                 if (edx & (1 << 15)) {
639                         opts |= MONO_OPT_CMOV;
640                         if (edx & 1)
641                                 opts |= MONO_OPT_FCMOV;
642                         else
643                                 *exclude_mask |= MONO_OPT_FCMOV;
644                 } else
645                         *exclude_mask |= MONO_OPT_CMOV;
646                 if (edx & (1 << 26))
647                         opts |= MONO_OPT_SSE2;
648                 else
649                         *exclude_mask |= MONO_OPT_SSE2;
650         }
651         return opts;
652 }
653
654 /*
655  * Determine whenever the trap whose info is in SIGINFO is caused by
656  * integer overflow.
657  */
658 gboolean
659 mono_arch_is_int_overflow (void *sigctx, void *info)
660 {
661         MonoContext ctx;
662         guint8* ip;
663
664         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
665
666         ip = (guint8*)ctx.eip;
667
668         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
669                 gint32 reg;
670
671                 /* idiv REG */
672                 switch (x86_modrm_rm (ip [1])) {
673                 case X86_EAX:
674                         reg = ctx.eax;
675                         break;
676                 case X86_ECX:
677                         reg = ctx.ecx;
678                         break;
679                 case X86_EDX:
680                         reg = ctx.edx;
681                         break;
682                 case X86_EBX:
683                         reg = ctx.ebx;
684                         break;
685                 case X86_ESI:
686                         reg = ctx.esi;
687                         break;
688                 case X86_EDI:
689                         reg = ctx.edi;
690                         break;
691                 default:
692                         g_assert_not_reached ();
693                         reg = -1;
694                 }
695
696                 if (reg == -1)
697                         return TRUE;
698         }
699                         
700         return FALSE;
701 }
702
703 GList *
704 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
705 {
706         GList *vars = NULL;
707         int i;
708
709         for (i = 0; i < cfg->num_varinfo; i++) {
710                 MonoInst *ins = cfg->varinfo [i];
711                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
712
713                 /* unused vars */
714                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
715                         continue;
716
717                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
718                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
719                         continue;
720
721                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
722                  * 8bit quantities in caller saved registers on x86 */
723                 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
724                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
725                         g_assert (i == vmv->idx);
726                         vars = g_list_prepend (vars, vmv);
727                 }
728         }
729
730         vars = mono_varlist_sort (cfg, vars, 0);
731
732         return vars;
733 }
734
735 GList *
736 mono_arch_get_global_int_regs (MonoCompile *cfg)
737 {
738         GList *regs = NULL;
739
740         /* we can use 3 registers for global allocation */
741         regs = g_list_prepend (regs, (gpointer)X86_EBX);
742         regs = g_list_prepend (regs, (gpointer)X86_ESI);
743         regs = g_list_prepend (regs, (gpointer)X86_EDI);
744
745         return regs;
746 }
747
748 /*
749  * mono_arch_regalloc_cost:
750  *
751  *  Return the cost, in number of memory references, of the action of 
752  * allocating the variable VMV into a register during global register
753  * allocation.
754  */
755 guint32
756 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
757 {
758         MonoInst *ins = cfg->varinfo [vmv->idx];
759
760         if (cfg->method->save_lmf)
761                 /* The register is already saved */
762                 return (ins->opcode == OP_ARG) ? 1 : 0;
763         else
764                 /* push+pop+possible load if it is an argument */
765                 return (ins->opcode == OP_ARG) ? 3 : 2;
766 }
767  
768 /*
769  * Set var information according to the calling convention. X86 version.
770  * The locals var stuff should most likely be split in another method.
771  */
772 void
773 mono_arch_allocate_vars (MonoCompile *cfg)
774 {
775         MonoMethodSignature *sig;
776         MonoMethodHeader *header;
777         MonoInst *inst;
778         guint32 locals_stack_size, locals_stack_align;
779         int i, offset;
780         gint32 *offsets;
781         CallInfo *cinfo;
782
783         header = mono_method_get_header (cfg->method);
784         sig = mono_method_signature (cfg->method);
785
786         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
787
788         cfg->frame_reg = MONO_ARCH_BASEREG;
789         offset = 0;
790
791         /* Reserve space to save LMF and caller saved registers */
792
793         if (cfg->method->save_lmf) {
794                 offset += sizeof (MonoLMF);
795         } else {
796                 if (cfg->used_int_regs & (1 << X86_EBX)) {
797                         offset += 4;
798                 }
799
800                 if (cfg->used_int_regs & (1 << X86_EDI)) {
801                         offset += 4;
802                 }
803
804                 if (cfg->used_int_regs & (1 << X86_ESI)) {
805                         offset += 4;
806                 }
807         }
808
809         switch (cinfo->ret.storage) {
810         case ArgValuetypeInReg:
811                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
812                 offset += 8;
813                 cfg->ret->opcode = OP_REGOFFSET;
814                 cfg->ret->inst_basereg = X86_EBP;
815                 cfg->ret->inst_offset = - offset;
816                 break;
817         default:
818                 break;
819         }
820
821         /* Allocate locals */
822         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
823         if (locals_stack_align) {
824                 offset += (locals_stack_align - 1);
825                 offset &= ~(locals_stack_align - 1);
826         }
827         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
828                 if (offsets [i] != -1) {
829                         MonoInst *inst = cfg->varinfo [i];
830                         inst->opcode = OP_REGOFFSET;
831                         inst->inst_basereg = X86_EBP;
832                         inst->inst_offset = - (offset + offsets [i]);
833                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
834                 }
835         }
836         offset += locals_stack_size;
837
838
839         /*
840          * Allocate arguments+return value
841          */
842
843         switch (cinfo->ret.storage) {
844         case ArgOnStack:
845                 cfg->ret->opcode = OP_REGOFFSET;
846                 cfg->ret->inst_basereg = X86_EBP;
847                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
848                 break;
849         case ArgValuetypeInReg:
850                 break;
851         case ArgInIReg:
852                 cfg->ret->opcode = OP_REGVAR;
853                 cfg->ret->inst_c0 = cinfo->ret.reg;
854                 break;
855         case ArgNone:
856         case ArgOnFloatFpStack:
857         case ArgOnDoubleFpStack:
858                 break;
859         default:
860                 g_assert_not_reached ();
861         }
862
863         if (sig->call_convention == MONO_CALL_VARARG) {
864                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
865                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
866         }
867
868         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
869                 ArgInfo *ainfo = &cinfo->args [i];
870                 inst = cfg->args [i];
871                 if (inst->opcode != OP_REGVAR) {
872                         inst->opcode = OP_REGOFFSET;
873                         inst->inst_basereg = X86_EBP;
874                 }
875                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
876         }
877
878         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
879         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
880
881         cfg->stack_offset = offset;
882 }
883
884 void
885 mono_arch_create_vars (MonoCompile *cfg)
886 {
887         MonoMethodSignature *sig;
888         CallInfo *cinfo;
889
890         sig = mono_method_signature (cfg->method);
891
892         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
893
894         if (cinfo->ret.storage == ArgValuetypeInReg)
895                 cfg->ret_var_is_local = TRUE;
896 }
897
898 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
899  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
900  */
901
902 static void
903 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call)
904 {
905         MonoInst *arg;
906         MonoMethodSignature *tmp_sig;
907         MonoInst *sig_arg;
908
909         /* FIXME: Add support for signature tokens to AOT */
910         cfg->disable_aot = TRUE;
911         MONO_INST_NEW (cfg, arg, OP_OUTARG);
912
913         /*
914          * mono_ArgIterator_Setup assumes the signature cookie is 
915          * passed first and all the arguments which were before it are
916          * passed on the stack after the signature. So compensate by 
917          * passing a different signature.
918          */
919         tmp_sig = mono_metadata_signature_dup (call->signature);
920         tmp_sig->param_count -= call->signature->sentinelpos;
921         tmp_sig->sentinelpos = 0;
922         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
923
924         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
925         sig_arg->inst_p0 = tmp_sig;
926
927         arg->inst_left = sig_arg;
928         arg->type = STACK_PTR;
929         /* prepend, so they get reversed */
930         arg->next = call->out_args;
931         call->out_args = arg;
932 }
933
934 /*
935  * It is expensive to adjust esp for each individual fp argument pushed on the stack
936  * so we try to do it just once when we have multiple fp arguments in a row.
937  * We don't use this mechanism generally because for int arguments the generated code
938  * is slightly bigger and new generation cpus optimize away the dependency chains
939  * created by push instructions on the esp value.
940  * fp_arg_setup is the first argument in the execution sequence where the esp register
941  * is modified.
942  */
943 static int
944 collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
945 {
946         int fp_space = 0;
947         MonoType *t;
948
949         for (; start_arg < sig->param_count; ++start_arg) {
950                 t = mono_type_get_underlying_type (sig->params [start_arg]);
951                 if (!t->byref && t->type == MONO_TYPE_R8) {
952                         fp_space += sizeof (double);
953                         *fp_arg_setup = start_arg;
954                 } else {
955                         break;
956                 }
957         }
958         return fp_space;
959 }
960
961 /* 
962  * take the arguments and generate the arch-specific
963  * instructions to properly call the function in call.
964  * This includes pushing, moving arguments to the right register
965  * etc.
966  */
967 MonoCallInst*
968 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
969         MonoInst *arg, *in;
970         MonoMethodSignature *sig;
971         int i, n;
972         CallInfo *cinfo;
973         int sentinelpos = 0;
974         int fp_args_space = 0, fp_args_offset = 0, fp_arg_setup = -1;
975
976         sig = call->signature;
977         n = sig->param_count + sig->hasthis;
978
979         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
980
981         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
982                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
983
984         for (i = 0; i < n; ++i) {
985                 ArgInfo *ainfo = cinfo->args + i;
986
987                 /* Emit the signature cookie just before the implicit arguments */
988                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
989                         emit_sig_cookie (cfg, call);
990                 }
991
992                 if (is_virtual && i == 0) {
993                         /* the argument will be attached to the call instrucion */
994                         in = call->args [i];
995                 } else {
996                         MonoType *t;
997
998                         if (i >= sig->hasthis)
999                                 t = sig->params [i - sig->hasthis];
1000                         else
1001                                 t = &mono_defaults.int_class->byval_arg;
1002                         t = mono_type_get_underlying_type (t);
1003
1004                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1005                         in = call->args [i];
1006                         arg->cil_code = in->cil_code;
1007                         arg->inst_left = in;
1008                         arg->type = in->type;
1009                         /* prepend, so they get reversed */
1010                         arg->next = call->out_args;
1011                         call->out_args = arg;
1012
1013                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
1014                                 guint32 size, align;
1015
1016                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
1017                                         size = sizeof (MonoTypedRef);
1018                                         align = sizeof (gpointer);
1019                                 }
1020                                 else
1021                                         if (sig->pinvoke)
1022                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
1023                                         else {
1024                                                 int ialign;
1025                                                 size = mini_type_stack_size (cfg->generic_sharing_context, &in->klass->byval_arg, &ialign);
1026                                                 align = ialign;
1027                                         }
1028                                 arg->opcode = OP_OUTARG_VT;
1029                                 arg->klass = in->klass;
1030                                 arg->backend.is_pinvoke = sig->pinvoke;
1031                                 arg->inst_imm = size; 
1032                         }
1033                         else {
1034                                 switch (ainfo->storage) {
1035                                 case ArgOnStack:
1036                                         arg->opcode = OP_OUTARG;
1037                                         if (!t->byref) {
1038                                                 if (t->type == MONO_TYPE_R4) {
1039                                                         arg->opcode = OP_OUTARG_R4;
1040                                                 } else if (t->type == MONO_TYPE_R8) {
1041                                                         arg->opcode = OP_OUTARG_R8;
1042                                                         /* we store in the upper bits of backen.arg_info the needed
1043                                                          * esp adjustment and in the lower bits the offset from esp
1044                                                          * where the arg needs to be stored
1045                                                          */
1046                                                         if (!fp_args_space) {
1047                                                                 fp_args_space = collect_fp_stack_space (sig, i - sig->hasthis, &fp_arg_setup);
1048                                                                 fp_args_offset = fp_args_space;
1049                                                         }
1050                                                         arg->backend.arg_info = fp_args_space - fp_args_offset;
1051                                                         fp_args_offset -= sizeof (double);
1052                                                         if (i - sig->hasthis == fp_arg_setup) {
1053                                                                 arg->backend.arg_info |= fp_args_space << 16;
1054                                                         }
1055                                                         if (fp_args_offset == 0) {
1056                                                                 /* the allocated esp stack is finished:
1057                                                                  * prepare for an eventual second run of fp args
1058                                                                  */
1059                                                                 fp_args_space = 0;
1060                                                         }
1061                                                 }
1062                                         }
1063                                         break;
1064                                 default:
1065                                         g_assert_not_reached ();
1066                                 }
1067                         }
1068                 }
1069         }
1070
1071         /* Handle the case where there are no implicit arguments */
1072         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1073                 emit_sig_cookie (cfg, call);
1074         }
1075
1076         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1077                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1078                         MonoInst *zero_inst;
1079                         /*
1080                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
1081                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
1082                          * before calling the function. So we add a dummy instruction to represent pushing the 
1083                          * struct return address to the stack. The return address will be saved to this stack slot 
1084                          * by the code emitted in this_vret_args.
1085                          */
1086                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
1087                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
1088                         zero_inst->inst_p0 = 0;
1089                         arg->inst_left = zero_inst;
1090                         arg->type = STACK_PTR;
1091                         /* prepend, so they get reversed */
1092                         arg->next = call->out_args;
1093                         call->out_args = arg;
1094                 }
1095                 else
1096                         /* if the function returns a struct, the called method already does a ret $0x4 */
1097                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1098                                 cinfo->stack_usage -= 4;
1099         }
1100         
1101         call->stack_usage = cinfo->stack_usage;
1102
1103 #if defined(__APPLE__)
1104         if (cinfo->need_stack_align) {
1105                 MONO_INST_NEW (cfg, arg, OP_X86_OUTARG_ALIGN_STACK);
1106                 arg->inst_c0 = cinfo->stack_align_amount;
1107                 arg->next = call->out_args;
1108                 call->out_args = arg;
1109         }
1110 #endif 
1111
1112         return call;
1113 }
1114
1115 /*
1116  * Allow tracing to work with this interface (with an optional argument)
1117  */
1118 void*
1119 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1120 {
1121         guchar *code = p;
1122
1123 #if __APPLE__
1124         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1125 #endif
1126
1127         /* if some args are passed in registers, we need to save them here */
1128         x86_push_reg (code, X86_EBP);
1129
1130         if (cfg->compile_aot) {
1131                 x86_push_imm (code, cfg->method);
1132                 x86_mov_reg_imm (code, X86_EAX, func);
1133                 x86_call_reg (code, X86_EAX);
1134         } else {
1135                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1136                 x86_push_imm (code, cfg->method);
1137                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1138                 x86_call_code (code, 0);
1139         }
1140 #if __APPLE__
1141         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 16);
1142 #else
1143         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1144 #endif
1145
1146         return code;
1147 }
1148
1149 enum {
1150         SAVE_NONE,
1151         SAVE_STRUCT,
1152         SAVE_EAX,
1153         SAVE_EAX_EDX,
1154         SAVE_FP
1155 };
1156
1157 void*
1158 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1159 {
1160         guchar *code = p;
1161         int arg_size = 0, save_mode = SAVE_NONE;
1162         MonoMethod *method = cfg->method;
1163         
1164         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1165         case MONO_TYPE_VOID:
1166                 /* special case string .ctor icall */
1167                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1168                         save_mode = SAVE_EAX;
1169                 else
1170                         save_mode = SAVE_NONE;
1171                 break;
1172         case MONO_TYPE_I8:
1173         case MONO_TYPE_U8:
1174                 save_mode = SAVE_EAX_EDX;
1175                 break;
1176         case MONO_TYPE_R4:
1177         case MONO_TYPE_R8:
1178                 save_mode = SAVE_FP;
1179                 break;
1180         case MONO_TYPE_GENERICINST:
1181                 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1182                         save_mode = SAVE_EAX;
1183                         break;
1184                 }
1185                 /* Fall through */
1186         case MONO_TYPE_VALUETYPE:
1187                 save_mode = SAVE_STRUCT;
1188                 break;
1189         default:
1190                 save_mode = SAVE_EAX;
1191                 break;
1192         }
1193
1194         switch (save_mode) {
1195         case SAVE_EAX_EDX:
1196                 x86_push_reg (code, X86_EDX);
1197                 x86_push_reg (code, X86_EAX);
1198                 if (enable_arguments) {
1199                         x86_push_reg (code, X86_EDX);
1200                         x86_push_reg (code, X86_EAX);
1201                         arg_size = 8;
1202                 }
1203                 break;
1204         case SAVE_EAX:
1205                 x86_push_reg (code, X86_EAX);
1206                 if (enable_arguments) {
1207                         x86_push_reg (code, X86_EAX);
1208                         arg_size = 4;
1209                 }
1210                 break;
1211         case SAVE_FP:
1212                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1213                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1214                 if (enable_arguments) {
1215                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1216                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1217                         arg_size = 8;
1218                 }
1219                 break;
1220         case SAVE_STRUCT:
1221                 if (enable_arguments) {
1222                         x86_push_membase (code, X86_EBP, 8);
1223                         arg_size = 4;
1224                 }
1225                 break;
1226         case SAVE_NONE:
1227         default:
1228                 break;
1229         }
1230
1231         if (cfg->compile_aot) {
1232                 x86_push_imm (code, method);
1233                 x86_mov_reg_imm (code, X86_EAX, func);
1234                 x86_call_reg (code, X86_EAX);
1235         } else {
1236                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1237                 x86_push_imm (code, method);
1238                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1239                 x86_call_code (code, 0);
1240         }
1241         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1242
1243         switch (save_mode) {
1244         case SAVE_EAX_EDX:
1245                 x86_pop_reg (code, X86_EAX);
1246                 x86_pop_reg (code, X86_EDX);
1247                 break;
1248         case SAVE_EAX:
1249                 x86_pop_reg (code, X86_EAX);
1250                 break;
1251         case SAVE_FP:
1252                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1253                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1254                 break;
1255         case SAVE_NONE:
1256         default:
1257                 break;
1258         }
1259
1260         return code;
1261 }
1262
1263 #define EMIT_COND_BRANCH(ins,cond,sign) \
1264 if (ins->flags & MONO_INST_BRLABEL) { \
1265         if (ins->inst_i0->inst_c0) { \
1266                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1267         } else { \
1268                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1269                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1270                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1271                         x86_branch8 (code, cond, 0, sign); \
1272                 else \
1273                         x86_branch32 (code, cond, 0, sign); \
1274         } \
1275 } else { \
1276         if (ins->inst_true_bb->native_offset) { \
1277                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1278         } else { \
1279                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1280                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1281                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1282                         x86_branch8 (code, cond, 0, sign); \
1283                 else \
1284                         x86_branch32 (code, cond, 0, sign); \
1285         } \
1286 }
1287
1288 /*  
1289  *      Emit an exception if condition is fail and
1290  *  if possible do a directly branch to target 
1291  */
1292 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1293         do {                                                        \
1294                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1295                 if (tins == NULL) {                                                                             \
1296                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1297                                         MONO_PATCH_INFO_EXC, exc_name);  \
1298                         x86_branch32 (code, cond, 0, signed);               \
1299                 } else {        \
1300                         EMIT_COND_BRANCH (tins, cond, signed);  \
1301                 }                       \
1302         } while (0); 
1303
1304 #define EMIT_FPCOMPARE(code) do { \
1305         x86_fcompp (code); \
1306         x86_fnstsw (code); \
1307 } while (0); 
1308
1309
1310 static guint8*
1311 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1312 {
1313         mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1314         x86_call_code (code, 0);
1315
1316         return code;
1317 }
1318
1319 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1320
1321 /*
1322  * peephole_pass_1:
1323  *
1324  *   Perform peephole opts which should/can be performed before local regalloc
1325  */
1326 static void
1327 peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1328 {
1329         MonoInst *ins, *last_ins = NULL;
1330         ins = bb->code;
1331
1332         while (ins) {
1333                 switch (ins->opcode) {
1334                 case OP_IADD_IMM:
1335                 case OP_ADD_IMM:
1336                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1337                                 /* 
1338                                  * X86_LEA is like ADD, but doesn't have the
1339                                  * sreg1==dreg restriction.
1340                                  */
1341                                 ins->opcode = OP_X86_LEA_MEMBASE;
1342                                 ins->inst_basereg = ins->sreg1;
1343                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1344                                 ins->opcode = OP_X86_INC_REG;
1345                         break;
1346                 case OP_SUB_IMM:
1347                 case OP_ISUB_IMM:
1348                         if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1349                                 ins->opcode = OP_X86_LEA_MEMBASE;
1350                                 ins->inst_basereg = ins->sreg1;
1351                                 ins->inst_imm = -ins->inst_imm;
1352                         } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1353                                 ins->opcode = OP_X86_DEC_REG;
1354                         break;
1355                 case OP_COMPARE_IMM:
1356                 case OP_ICOMPARE_IMM:
1357                         /* OP_COMPARE_IMM (reg, 0) 
1358                          * --> 
1359                          * OP_X86_TEST_NULL (reg) 
1360                          */
1361                         if (!ins->inst_imm)
1362                                 ins->opcode = OP_X86_TEST_NULL;
1363                         break;
1364                 case OP_X86_COMPARE_MEMBASE_IMM:
1365                         /* 
1366                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1367                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1368                          * -->
1369                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1370                          * OP_COMPARE_IMM reg, imm
1371                          *
1372                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1373                          */
1374                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1375                             ins->inst_basereg == last_ins->inst_destbasereg &&
1376                             ins->inst_offset == last_ins->inst_offset) {
1377                                         ins->opcode = OP_COMPARE_IMM;
1378                                         ins->sreg1 = last_ins->sreg1;
1379
1380                                         /* check if we can remove cmp reg,0 with test null */
1381                                         if (!ins->inst_imm)
1382                                                 ins->opcode = OP_X86_TEST_NULL;
1383                                 }
1384
1385                         break;
1386                 case OP_LOAD_MEMBASE:
1387                 case OP_LOADI4_MEMBASE:
1388                         /* 
1389                          * Note: if reg1 = reg2 the load op is removed
1390                          *
1391                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1392                          * OP_LOAD_MEMBASE offset(basereg), reg2
1393                          * -->
1394                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1395                          * OP_MOVE reg1, reg2
1396                          */
1397                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1398                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1399                             ins->inst_basereg == last_ins->inst_destbasereg &&
1400                             ins->inst_offset == last_ins->inst_offset) {
1401                                 if (ins->dreg == last_ins->sreg1) {
1402                                         last_ins->next = ins->next;                             
1403                                         ins = ins->next;                                
1404                                         continue;
1405                                 } else {
1406                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1407                                         ins->opcode = OP_MOVE;
1408                                         ins->sreg1 = last_ins->sreg1;
1409                                 }
1410
1411                         /* 
1412                          * Note: reg1 must be different from the basereg in the second load
1413                          * Note: if reg1 = reg2 is equal then second load is removed
1414                          *
1415                          * OP_LOAD_MEMBASE offset(basereg), reg1
1416                          * OP_LOAD_MEMBASE offset(basereg), reg2
1417                          * -->
1418                          * OP_LOAD_MEMBASE offset(basereg), reg1
1419                          * OP_MOVE reg1, reg2
1420                          */
1421                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1422                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1423                               ins->inst_basereg != last_ins->dreg &&
1424                               ins->inst_basereg == last_ins->inst_basereg &&
1425                               ins->inst_offset == last_ins->inst_offset) {
1426
1427                                 if (ins->dreg == last_ins->dreg) {
1428                                         last_ins->next = ins->next;                             
1429                                         ins = ins->next;                                
1430                                         continue;
1431                                 } else {
1432                                         ins->opcode = OP_MOVE;
1433                                         ins->sreg1 = last_ins->dreg;
1434                                 }
1435
1436                                 //g_assert_not_reached ();
1437
1438 #if 0
1439                         /* 
1440                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1441                          * OP_LOAD_MEMBASE offset(basereg), reg
1442                          * -->
1443                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1444                          * OP_ICONST reg, imm
1445                          */
1446                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1447                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1448                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1449                                    ins->inst_offset == last_ins->inst_offset) {
1450                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1451                                 ins->opcode = OP_ICONST;
1452                                 ins->inst_c0 = last_ins->inst_imm;
1453                                 g_assert_not_reached (); // check this rule
1454 #endif
1455                         }
1456                         break;
1457                 case OP_LOADU1_MEMBASE:
1458                 case OP_LOADI1_MEMBASE:
1459                         /* 
1460                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1461                          * OP_LOAD_MEMBASE offset(basereg), reg2
1462                          * -->
1463                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1464                          * CONV_I2/U2 reg1, reg2
1465                          */
1466                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1467                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1468                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1469                                         ins->inst_offset == last_ins->inst_offset) {
1470                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1471                                 ins->sreg1 = last_ins->sreg1;
1472                         }
1473                         break;
1474                 case OP_LOADU2_MEMBASE:
1475                 case OP_LOADI2_MEMBASE:
1476                         /* 
1477                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1478                          * OP_LOAD_MEMBASE offset(basereg), reg2
1479                          * -->
1480                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1481                          * CONV_I2/U2 reg1, reg2
1482                          */
1483                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1484                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1485                                         ins->inst_offset == last_ins->inst_offset) {
1486                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1487                                 ins->sreg1 = last_ins->sreg1;
1488                         }
1489                         break;
1490                 case CEE_CONV_I4:
1491                 case CEE_CONV_U4:
1492                 case OP_ICONV_TO_I4:
1493                 case OP_MOVE:
1494                         /*
1495                          * Removes:
1496                          *
1497                          * OP_MOVE reg, reg 
1498                          */
1499                         if (ins->dreg == ins->sreg1) {
1500                                 if (last_ins)
1501                                         last_ins->next = ins->next;                             
1502                                 ins = ins->next;
1503                                 continue;
1504                         }
1505                         /* 
1506                          * Removes:
1507                          *
1508                          * OP_MOVE sreg, dreg 
1509                          * OP_MOVE dreg, sreg
1510                          */
1511                         if (last_ins && last_ins->opcode == OP_MOVE &&
1512                             ins->sreg1 == last_ins->dreg &&
1513                             ins->dreg == last_ins->sreg1) {
1514                                 last_ins->next = ins->next;                             
1515                                 ins = ins->next;                                
1516                                 continue;
1517                         }
1518                         break;
1519                         
1520                 case OP_X86_PUSH_MEMBASE:
1521                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1522                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1523                             ins->inst_basereg == last_ins->inst_destbasereg &&
1524                             ins->inst_offset == last_ins->inst_offset) {
1525                                     ins->opcode = OP_X86_PUSH;
1526                                     ins->sreg1 = last_ins->sreg1;
1527                         }
1528                         break;
1529                 }
1530                 last_ins = ins;
1531                 ins = ins->next;
1532         }
1533         bb->last_ins = last_ins;
1534 }
1535
1536 static void
1537 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1538 {
1539         MonoInst *ins, *last_ins = NULL;
1540         ins = bb->code;
1541
1542         while (ins) {
1543
1544                 switch (ins->opcode) {
1545                 case OP_ICONST:
1546                         /* reg = 0 -> XOR (reg, reg) */
1547                         /* XOR sets cflags on x86, so we cant do it always */
1548                         if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1549                                 MonoInst *ins2;
1550
1551                                 ins->opcode = OP_IXOR;
1552                                 ins->sreg1 = ins->dreg;
1553                                 ins->sreg2 = ins->dreg;
1554
1555                                 /* 
1556                                  * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG 
1557                                  * since it takes 3 bytes instead of 7.
1558                                  */
1559                                 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1560                                         if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1561                                                 ins2->opcode = OP_STORE_MEMBASE_REG;
1562                                                 ins2->sreg1 = ins->dreg;
1563                                         }
1564                                         else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1565                                                 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1566                                                 ins2->sreg1 = ins->dreg;
1567                                         }
1568                                         else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1569                                                 /* Continue iteration */
1570                                         }
1571                                         else
1572                                                 break;
1573                                 }
1574                         }
1575                         break;
1576                 case OP_IADD_IMM:
1577                 case OP_ADD_IMM:
1578                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1579                                 ins->opcode = OP_X86_INC_REG;
1580                         break;
1581                 case OP_ISUB_IMM:
1582                 case OP_SUB_IMM:
1583                         if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1584                                 ins->opcode = OP_X86_DEC_REG;
1585                         break;
1586                 case OP_X86_COMPARE_MEMBASE_IMM:
1587                         /* 
1588                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1589                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1590                          * -->
1591                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1592                          * OP_COMPARE_IMM reg, imm
1593                          *
1594                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1595                          */
1596                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1597                             ins->inst_basereg == last_ins->inst_destbasereg &&
1598                             ins->inst_offset == last_ins->inst_offset) {
1599                                         ins->opcode = OP_COMPARE_IMM;
1600                                         ins->sreg1 = last_ins->sreg1;
1601
1602                                         /* check if we can remove cmp reg,0 with test null */
1603                                         if (!ins->inst_imm)
1604                                                 ins->opcode = OP_X86_TEST_NULL;
1605                                 }
1606
1607                         break;
1608                 case OP_LOAD_MEMBASE:
1609                 case OP_LOADI4_MEMBASE:
1610                         /* 
1611                          * Note: if reg1 = reg2 the load op is removed
1612                          *
1613                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1614                          * OP_LOAD_MEMBASE offset(basereg), reg2
1615                          * -->
1616                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1617                          * OP_MOVE reg1, reg2
1618                          */
1619                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1620                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1621                             ins->inst_basereg == last_ins->inst_destbasereg &&
1622                             ins->inst_offset == last_ins->inst_offset) {
1623                                 if (ins->dreg == last_ins->sreg1) {
1624                                         last_ins->next = ins->next;                             
1625                                         ins = ins->next;                                
1626                                         continue;
1627                                 } else {
1628                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1629                                         ins->opcode = OP_MOVE;
1630                                         ins->sreg1 = last_ins->sreg1;
1631                                 }
1632
1633                         /* 
1634                          * Note: reg1 must be different from the basereg in the second load
1635                          * Note: if reg1 = reg2 is equal then second load is removed
1636                          *
1637                          * OP_LOAD_MEMBASE offset(basereg), reg1
1638                          * OP_LOAD_MEMBASE offset(basereg), reg2
1639                          * -->
1640                          * OP_LOAD_MEMBASE offset(basereg), reg1
1641                          * OP_MOVE reg1, reg2
1642                          */
1643                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1644                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1645                               ins->inst_basereg != last_ins->dreg &&
1646                               ins->inst_basereg == last_ins->inst_basereg &&
1647                               ins->inst_offset == last_ins->inst_offset) {
1648
1649                                 if (ins->dreg == last_ins->dreg) {
1650                                         last_ins->next = ins->next;                             
1651                                         ins = ins->next;                                
1652                                         continue;
1653                                 } else {
1654                                         ins->opcode = OP_MOVE;
1655                                         ins->sreg1 = last_ins->dreg;
1656                                 }
1657
1658                                 //g_assert_not_reached ();
1659
1660 #if 0
1661                         /* 
1662                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1663                          * OP_LOAD_MEMBASE offset(basereg), reg
1664                          * -->
1665                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1666                          * OP_ICONST reg, imm
1667                          */
1668                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1669                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1670                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1671                                    ins->inst_offset == last_ins->inst_offset) {
1672                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1673                                 ins->opcode = OP_ICONST;
1674                                 ins->inst_c0 = last_ins->inst_imm;
1675                                 g_assert_not_reached (); // check this rule
1676 #endif
1677                         }
1678                         break;
1679                 case OP_LOADU1_MEMBASE:
1680                 case OP_LOADI1_MEMBASE:
1681                         /* 
1682                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1683                          * OP_LOAD_MEMBASE offset(basereg), reg2
1684                          * -->
1685                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1686                          * CONV_I2/U2 reg1, reg2
1687                          */
1688                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1689                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1690                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1691                                         ins->inst_offset == last_ins->inst_offset) {
1692                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1693                                 ins->sreg1 = last_ins->sreg1;
1694                         }
1695                         break;
1696                 case OP_LOADU2_MEMBASE:
1697                 case OP_LOADI2_MEMBASE:
1698                         /* 
1699                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1700                          * OP_LOAD_MEMBASE offset(basereg), reg2
1701                          * -->
1702                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1703                          * CONV_I2/U2 reg1, reg2
1704                          */
1705                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1706                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1707                                         ins->inst_offset == last_ins->inst_offset) {
1708                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1709                                 ins->sreg1 = last_ins->sreg1;
1710                         }
1711                         break;
1712                 case CEE_CONV_I4:
1713                 case CEE_CONV_U4:
1714                 case OP_ICONV_TO_I4:
1715                 case OP_MOVE:
1716                         /*
1717                          * Removes:
1718                          *
1719                          * OP_MOVE reg, reg 
1720                          */
1721                         if (ins->dreg == ins->sreg1) {
1722                                 if (last_ins)
1723                                         last_ins->next = ins->next;                             
1724                                 ins = ins->next;
1725                                 continue;
1726                         }
1727                         /* 
1728                          * Removes:
1729                          *
1730                          * OP_MOVE sreg, dreg 
1731                          * OP_MOVE dreg, sreg
1732                          */
1733                         if (last_ins && last_ins->opcode == OP_MOVE &&
1734                             ins->sreg1 == last_ins->dreg &&
1735                             ins->dreg == last_ins->sreg1) {
1736                                 last_ins->next = ins->next;                             
1737                                 ins = ins->next;                                
1738                                 continue;
1739                         }
1740                         break;
1741                 case OP_X86_PUSH_MEMBASE:
1742                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1743                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1744                             ins->inst_basereg == last_ins->inst_destbasereg &&
1745                             ins->inst_offset == last_ins->inst_offset) {
1746                                     ins->opcode = OP_X86_PUSH;
1747                                     ins->sreg1 = last_ins->sreg1;
1748                         }
1749                         break;
1750                 }
1751                 last_ins = ins;
1752                 ins = ins->next;
1753         }
1754         bb->last_ins = last_ins;
1755 }
1756
1757 static const int 
1758 branch_cc_table [] = {
1759         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1760         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1761         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1762 };
1763
1764 /* Maps CMP_... constants to X86_CC_... constants */
1765 static const int
1766 cc_table [] = {
1767         X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1768         X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1769 };
1770
1771 static const int
1772 cc_signed_table [] = {
1773         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1774         FALSE, FALSE, FALSE, FALSE
1775 };
1776
1777 void
1778 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1779 {
1780         if (cfg->opt & MONO_OPT_PEEPHOLE)
1781                 peephole_pass_1 (cfg, bb);
1782
1783         mono_local_regalloc (cfg, bb);
1784 }
1785
1786 static unsigned char*
1787 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1788 {
1789 #define XMM_TEMP_REG 0
1790         if (cfg->opt & MONO_OPT_SSE2 && size < 8) {
1791                 /* optimize by assigning a local var for this use so we avoid
1792                  * the stack manipulations */
1793                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1794                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1795                 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1796                 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1797                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1798                 if (size == 1)
1799                         x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1800                 else if (size == 2)
1801                         x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1802                 return code;
1803         }
1804         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1805         x86_fnstcw_membase(code, X86_ESP, 0);
1806         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1807         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1808         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1809         x86_fldcw_membase (code, X86_ESP, 2);
1810         if (size == 8) {
1811                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1812                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1813                 x86_pop_reg (code, dreg);
1814                 /* FIXME: need the high register 
1815                  * x86_pop_reg (code, dreg_high);
1816                  */
1817         } else {
1818                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1819                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1820                 x86_pop_reg (code, dreg);
1821         }
1822         x86_fldcw_membase (code, X86_ESP, 0);
1823         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1824
1825         if (size == 1)
1826                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1827         else if (size == 2)
1828                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1829         return code;
1830 }
1831
1832 static unsigned char*
1833 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1834 {
1835         int sreg = tree->sreg1;
1836         int need_touch = FALSE;
1837
1838 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1839         need_touch = TRUE;
1840 #endif
1841
1842         if (need_touch) {
1843                 guint8* br[5];
1844
1845                 /*
1846                  * Under Windows:
1847                  * If requested stack size is larger than one page,
1848                  * perform stack-touch operation
1849                  */
1850                 /*
1851                  * Generate stack probe code.
1852                  * Under Windows, it is necessary to allocate one page at a time,
1853                  * "touching" stack after each successful sub-allocation. This is
1854                  * because of the way stack growth is implemented - there is a
1855                  * guard page before the lowest stack page that is currently commited.
1856                  * Stack normally grows sequentially so OS traps access to the
1857                  * guard page and commits more pages when needed.
1858                  */
1859                 x86_test_reg_imm (code, sreg, ~0xFFF);
1860                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1861
1862                 br[2] = code; /* loop */
1863                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1864                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1865
1866                 /* 
1867                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1868                  * that follows only initializes the last part of the area.
1869                  */
1870                 /* Same as the init code below with size==0x1000 */
1871                 if (tree->flags & MONO_INST_INIT) {
1872                         x86_push_reg (code, X86_EAX);
1873                         x86_push_reg (code, X86_ECX);
1874                         x86_push_reg (code, X86_EDI);
1875                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1876                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1877                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1878                         x86_cld (code);
1879                         x86_prefix (code, X86_REP_PREFIX);
1880                         x86_stosl (code);
1881                         x86_pop_reg (code, X86_EDI);
1882                         x86_pop_reg (code, X86_ECX);
1883                         x86_pop_reg (code, X86_EAX);
1884                 }
1885
1886                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1887                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1888                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1889                 x86_patch (br[3], br[2]);
1890                 x86_test_reg_reg (code, sreg, sreg);
1891                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1892                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1893
1894                 br[1] = code; x86_jump8 (code, 0);
1895
1896                 x86_patch (br[0], code);
1897                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1898                 x86_patch (br[1], code);
1899                 x86_patch (br[4], code);
1900         }
1901         else
1902                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1903
1904         if (tree->flags & MONO_INST_INIT) {
1905                 int offset = 0;
1906                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1907                         x86_push_reg (code, X86_EAX);
1908                         offset += 4;
1909                 }
1910                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1911                         x86_push_reg (code, X86_ECX);
1912                         offset += 4;
1913                 }
1914                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1915                         x86_push_reg (code, X86_EDI);
1916                         offset += 4;
1917                 }
1918                 
1919                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1920                 if (sreg != X86_ECX)
1921                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1922                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1923                                 
1924                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1925                 x86_cld (code);
1926                 x86_prefix (code, X86_REP_PREFIX);
1927                 x86_stosl (code);
1928                 
1929                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1930                         x86_pop_reg (code, X86_EDI);
1931                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1932                         x86_pop_reg (code, X86_ECX);
1933                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1934                         x86_pop_reg (code, X86_EAX);
1935         }
1936         return code;
1937 }
1938
1939
1940 static guint8*
1941 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1942 {
1943         CallInfo *cinfo;
1944         int quad;
1945
1946         /* Move return value to the target register */
1947         switch (ins->opcode) {
1948         case CEE_CALL:
1949         case OP_CALL_REG:
1950         case OP_CALL_MEMBASE:
1951                 if (ins->dreg != X86_EAX)
1952                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1953                 break;
1954         case OP_VCALL:
1955         case OP_VCALL_REG:
1956         case OP_VCALL_MEMBASE:
1957                 cinfo = get_call_info (cfg, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
1958                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1959                         /* Pop the destination address from the stack */
1960                         x86_pop_reg (code, X86_ECX);
1961                         
1962                         for (quad = 0; quad < 2; quad ++) {
1963                                 switch (cinfo->ret.pair_storage [quad]) {
1964                                 case ArgInIReg:
1965                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1966                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1967                                         break;
1968                                 case ArgNone:
1969                                         break;
1970                                 default:
1971                                         g_assert_not_reached ();
1972                                 }
1973                         }
1974                 }
1975         default:
1976                 break;
1977         }
1978
1979         return code;
1980 }
1981
1982 /*
1983  * emit_tls_get:
1984  * @code: buffer to store code to
1985  * @dreg: hard register where to place the result
1986  * @tls_offset: offset info
1987  *
1988  * emit_tls_get emits in @code the native code that puts in the dreg register
1989  * the item in the thread local storage identified by tls_offset.
1990  *
1991  * Returns: a pointer to the end of the stored code
1992  */
1993 static guint8*
1994 emit_tls_get (guint8* code, int dreg, int tls_offset)
1995 {
1996 #ifdef PLATFORM_WIN32
1997         /* 
1998          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1999          * Journal and/or a disassembly of the TlsGet () function.
2000          */
2001         g_assert (tls_offset < 64);
2002         x86_prefix (code, X86_FS_PREFIX);
2003         x86_mov_reg_mem (code, dreg, 0x18, 4);
2004         /* Dunno what this does but TlsGetValue () contains it */
2005         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2006         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2007 #else
2008         if (optimize_for_xen) {
2009                 x86_prefix (code, X86_GS_PREFIX);
2010                 x86_mov_reg_mem (code, dreg, 0, 4);
2011                 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
2012         } else {
2013                 x86_prefix (code, X86_GS_PREFIX);
2014                 x86_mov_reg_mem (code, dreg, tls_offset, 4);
2015         }
2016 #endif
2017         return code;
2018 }
2019
2020 /*
2021  * emit_load_volatile_arguments:
2022  *
2023  *  Load volatile arguments from the stack to the original input registers.
2024  * Required before a tail call.
2025  */
2026 static guint8*
2027 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
2028 {
2029         MonoMethod *method = cfg->method;
2030         MonoMethodSignature *sig;
2031         MonoInst *inst;
2032         CallInfo *cinfo;
2033         guint32 i;
2034
2035         /* FIXME: Generate intermediate code instead */
2036
2037         sig = mono_method_signature (method);
2038
2039         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
2040         
2041         /* This is the opposite of the code in emit_prolog */
2042
2043         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
2044                 ArgInfo *ainfo = cinfo->args + i;
2045                 MonoType *arg_type;
2046                 inst = cfg->args [i];
2047
2048                 if (sig->hasthis && (i == 0))
2049                         arg_type = &mono_defaults.object_class->byval_arg;
2050                 else
2051                         arg_type = sig->params [i - sig->hasthis];
2052
2053                 /*
2054                  * On x86, the arguments are either in their original stack locations, or in
2055                  * global regs.
2056                  */
2057                 if (inst->opcode == OP_REGVAR) {
2058                         g_assert (ainfo->storage == ArgOnStack);
2059                         
2060                         x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
2061                 }
2062         }
2063
2064         return code;
2065 }
2066
2067 #define REAL_PRINT_REG(text,reg) \
2068 mono_assert (reg >= 0); \
2069 x86_push_reg (code, X86_EAX); \
2070 x86_push_reg (code, X86_EDX); \
2071 x86_push_reg (code, X86_ECX); \
2072 x86_push_reg (code, reg); \
2073 x86_push_imm (code, reg); \
2074 x86_push_imm (code, text " %d %p\n"); \
2075 x86_mov_reg_imm (code, X86_EAX, printf); \
2076 x86_call_reg (code, X86_EAX); \
2077 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2078 x86_pop_reg (code, X86_ECX); \
2079 x86_pop_reg (code, X86_EDX); \
2080 x86_pop_reg (code, X86_EAX);
2081
2082 /* benchmark and set based on cpu */
2083 #define LOOP_ALIGNMENT 8
2084 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2085
2086 void
2087 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2088 {
2089         MonoInst *ins;
2090         MonoCallInst *call;
2091         guint offset;
2092         guint8 *code = cfg->native_code + cfg->code_len;
2093         MonoInst *last_ins = NULL;
2094         guint last_offset = 0;
2095         int max_len, cpos;
2096
2097         if (cfg->opt & MONO_OPT_PEEPHOLE)
2098                 peephole_pass (cfg, bb);
2099
2100         if (cfg->opt & MONO_OPT_LOOP) {
2101                 int pad, align = LOOP_ALIGNMENT;
2102                 /* set alignment depending on cpu */
2103                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2104                         pad = align - pad;
2105                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2106                         x86_padding (code, pad);
2107                         cfg->code_len += pad;
2108                         bb->native_offset = cfg->code_len;
2109                 }
2110         }
2111
2112         if (cfg->verbose_level > 2)
2113                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2114
2115         cpos = bb->max_offset;
2116
2117         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2118                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2119                 g_assert (!cfg->compile_aot);
2120                 cpos += 6;
2121
2122                 cov->data [bb->dfn].cil_code = bb->cil_code;
2123                 /* this is not thread save, but good enough */
2124                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2125         }
2126
2127         offset = code - cfg->native_code;
2128
2129         mono_debug_open_block (cfg, bb, offset);
2130
2131         ins = bb->code;
2132         while (ins) {
2133                 offset = code - cfg->native_code;
2134
2135                 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2136
2137                 if (offset > (cfg->code_size - max_len - 16)) {
2138                         cfg->code_size *= 2;
2139                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2140                         code = cfg->native_code + offset;
2141                         mono_jit_stats.code_reallocs++;
2142                 }
2143
2144                 mono_debug_record_line_number (cfg, ins, offset);
2145
2146                 switch (ins->opcode) {
2147                 case OP_BIGMUL:
2148                         x86_mul_reg (code, ins->sreg2, TRUE);
2149                         break;
2150                 case OP_BIGMUL_UN:
2151                         x86_mul_reg (code, ins->sreg2, FALSE);
2152                         break;
2153                 case OP_X86_SETEQ_MEMBASE:
2154                 case OP_X86_SETNE_MEMBASE:
2155                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2156                                          ins->inst_basereg, ins->inst_offset, TRUE);
2157                         break;
2158                 case OP_STOREI1_MEMBASE_IMM:
2159                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2160                         break;
2161                 case OP_STOREI2_MEMBASE_IMM:
2162                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2163                         break;
2164                 case OP_STORE_MEMBASE_IMM:
2165                 case OP_STOREI4_MEMBASE_IMM:
2166                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2167                         break;
2168                 case OP_STOREI1_MEMBASE_REG:
2169                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2170                         break;
2171                 case OP_STOREI2_MEMBASE_REG:
2172                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2173                         break;
2174                 case OP_STORE_MEMBASE_REG:
2175                 case OP_STOREI4_MEMBASE_REG:
2176                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2177                         break;
2178                 case CEE_LDIND_I:
2179                 case CEE_LDIND_I4:
2180                 case CEE_LDIND_U4:
2181                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2182                         break;
2183                 case OP_LOADU4_MEM:
2184                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2185                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2186                         break;
2187                 case OP_LOAD_MEMBASE:
2188                 case OP_LOADI4_MEMBASE:
2189                 case OP_LOADU4_MEMBASE:
2190                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2191                         break;
2192                 case OP_LOADU1_MEMBASE:
2193                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2194                         break;
2195                 case OP_LOADI1_MEMBASE:
2196                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2197                         break;
2198                 case OP_LOADU2_MEMBASE:
2199                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2200                         break;
2201                 case OP_LOADI2_MEMBASE:
2202                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2203                         break;
2204                 case CEE_CONV_I1:
2205                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2206                         break;
2207                 case CEE_CONV_I2:
2208                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2209                         break;
2210                 case CEE_CONV_U1:
2211                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2212                         break;
2213                 case CEE_CONV_U2:
2214                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2215                         break;
2216                 case OP_COMPARE:
2217                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2218                         break;
2219                 case OP_COMPARE_IMM:
2220                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2221                         break;
2222                 case OP_X86_COMPARE_MEMBASE_REG:
2223                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2224                         break;
2225                 case OP_X86_COMPARE_MEMBASE_IMM:
2226                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2227                         break;
2228                 case OP_X86_COMPARE_MEMBASE8_IMM:
2229                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2230                         break;
2231                 case OP_X86_COMPARE_REG_MEMBASE:
2232                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2233                         break;
2234                 case OP_X86_COMPARE_MEM_IMM:
2235                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2236                         break;
2237                 case OP_X86_TEST_NULL:
2238                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2239                         break;
2240                 case OP_X86_ADD_MEMBASE_IMM:
2241                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2242                         break;
2243                 case OP_X86_ADD_MEMBASE:
2244                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2245                         break;
2246                 case OP_X86_SUB_MEMBASE_IMM:
2247                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2248                         break;
2249                 case OP_X86_SUB_MEMBASE:
2250                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2251                         break;
2252                 case OP_X86_AND_MEMBASE_IMM:
2253                         x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2254                         break;
2255                 case OP_X86_OR_MEMBASE_IMM:
2256                         x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2257                         break;
2258                 case OP_X86_XOR_MEMBASE_IMM:
2259                         x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2260                         break;
2261                 case OP_X86_INC_MEMBASE:
2262                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2263                         break;
2264                 case OP_X86_INC_REG:
2265                         x86_inc_reg (code, ins->dreg);
2266                         break;
2267                 case OP_X86_DEC_MEMBASE:
2268                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2269                         break;
2270                 case OP_X86_DEC_REG:
2271                         x86_dec_reg (code, ins->dreg);
2272                         break;
2273                 case OP_X86_MUL_MEMBASE:
2274                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2275                         break;
2276                 case OP_BREAK:
2277                         x86_breakpoint (code);
2278                         break;
2279                 case OP_ADDCC:
2280                 case CEE_ADD:
2281                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2282                         break;
2283                 case OP_ADC:
2284                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2285                         break;
2286                 case OP_ADDCC_IMM:
2287                 case OP_ADD_IMM:
2288                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2289                         break;
2290                 case OP_ADC_IMM:
2291                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2292                         break;
2293                 case OP_SUBCC:
2294                 case CEE_SUB:
2295                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2296                         break;
2297                 case OP_SBB:
2298                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2299                         break;
2300                 case OP_SUBCC_IMM:
2301                 case OP_SUB_IMM:
2302                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2303                         break;
2304                 case OP_SBB_IMM:
2305                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2306                         break;
2307                 case CEE_AND:
2308                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2309                         break;
2310                 case OP_AND_IMM:
2311                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2312                         break;
2313                 case CEE_DIV:
2314                         x86_cdq (code);
2315                         x86_div_reg (code, ins->sreg2, TRUE);
2316                         break;
2317                 case CEE_DIV_UN:
2318                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2319                         x86_div_reg (code, ins->sreg2, FALSE);
2320                         break;
2321                 case OP_DIV_IMM:
2322                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2323                         x86_cdq (code);
2324                         x86_div_reg (code, ins->sreg2, TRUE);
2325                         break;
2326                 case CEE_REM:
2327                         x86_cdq (code);
2328                         x86_div_reg (code, ins->sreg2, TRUE);
2329                         break;
2330                 case CEE_REM_UN:
2331                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2332                         x86_div_reg (code, ins->sreg2, FALSE);
2333                         break;
2334                 case OP_REM_IMM:
2335                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2336                         x86_cdq (code);
2337                         x86_div_reg (code, ins->sreg2, TRUE);
2338                         break;
2339                 case CEE_OR:
2340                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2341                         break;
2342                 case OP_OR_IMM:
2343                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2344                         break;
2345                 case CEE_XOR:
2346                 case OP_IXOR:
2347                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2348                         break;
2349                 case OP_XOR_IMM:
2350                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2351                         break;
2352                 case CEE_SHL:
2353                         g_assert (ins->sreg2 == X86_ECX);
2354                         x86_shift_reg (code, X86_SHL, ins->dreg);
2355                         break;
2356                 case CEE_SHR:
2357                         g_assert (ins->sreg2 == X86_ECX);
2358                         x86_shift_reg (code, X86_SAR, ins->dreg);
2359                         break;
2360                 case OP_SHR_IMM:
2361                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2362                         break;
2363                 case OP_SHR_UN_IMM:
2364                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2365                         break;
2366                 case CEE_SHR_UN:
2367                         g_assert (ins->sreg2 == X86_ECX);
2368                         x86_shift_reg (code, X86_SHR, ins->dreg);
2369                         break;
2370                 case OP_SHL_IMM:
2371                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2372                         break;
2373                 case OP_LSHL: {
2374                         guint8 *jump_to_end;
2375
2376                         /* handle shifts below 32 bits */
2377                         x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2378                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2379
2380                         x86_test_reg_imm (code, X86_ECX, 32);
2381                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2382
2383                         /* handle shift over 32 bit */
2384                         x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2385                         x86_clear_reg (code, ins->sreg1);
2386                         
2387                         x86_patch (jump_to_end, code);
2388                         }
2389                         break;
2390                 case OP_LSHR: {
2391                         guint8 *jump_to_end;
2392
2393                         /* handle shifts below 32 bits */
2394                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2395                         x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2396
2397                         x86_test_reg_imm (code, X86_ECX, 32);
2398                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2399
2400                         /* handle shifts over 31 bits */
2401                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2402                         x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2403                         
2404                         x86_patch (jump_to_end, code);
2405                         }
2406                         break;
2407                 case OP_LSHR_UN: {
2408                         guint8 *jump_to_end;
2409
2410                         /* handle shifts below 32 bits */
2411                         x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2412                         x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2413
2414                         x86_test_reg_imm (code, X86_ECX, 32);
2415                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2416
2417                         /* handle shifts over 31 bits */
2418                         x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2419                         x86_clear_reg (code, ins->backend.reg3);
2420                         
2421                         x86_patch (jump_to_end, code);
2422                         }
2423                         break;
2424                 case OP_LSHL_IMM:
2425                         if (ins->inst_imm >= 32) {
2426                                 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2427                                 x86_clear_reg (code, ins->sreg1);
2428                                 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2429                         } else {
2430                                 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2431                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2432                         }
2433                         break;
2434                 case OP_LSHR_IMM:
2435                         if (ins->inst_imm >= 32) {
2436                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3,  4);
2437                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2438                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2439                         } else {
2440                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2441                                 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2442                         }
2443                         break;
2444                 case OP_LSHR_UN_IMM:
2445                         if (ins->inst_imm >= 32) {
2446                                 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2447                                 x86_clear_reg (code, ins->backend.reg3);
2448                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2449                         } else {
2450                                 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2451                                 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2452                         }
2453                         break;
2454                 case CEE_NOT:
2455                         x86_not_reg (code, ins->sreg1);
2456                         break;
2457                 case CEE_NEG:
2458                         x86_neg_reg (code, ins->sreg1);
2459                         break;
2460                 case OP_SEXT_I1:
2461                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2462                         break;
2463                 case OP_SEXT_I2:
2464                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2465                         break;
2466                 case CEE_MUL:
2467                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2468                         break;
2469                 case OP_MUL_IMM:
2470                         switch (ins->inst_imm) {
2471                         case 2:
2472                                 /* MOV r1, r2 */
2473                                 /* ADD r1, r1 */
2474                                 if (ins->dreg != ins->sreg1)
2475                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2476                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2477                                 break;
2478                         case 3:
2479                                 /* LEA r1, [r2 + r2*2] */
2480                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2481                                 break;
2482                         case 5:
2483                                 /* LEA r1, [r2 + r2*4] */
2484                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2485                                 break;
2486                         case 6:
2487                                 /* LEA r1, [r2 + r2*2] */
2488                                 /* ADD r1, r1          */
2489                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2490                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2491                                 break;
2492                         case 9:
2493                                 /* LEA r1, [r2 + r2*8] */
2494                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2495                                 break;
2496                         case 10:
2497                                 /* LEA r1, [r2 + r2*4] */
2498                                 /* ADD r1, r1          */
2499                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2500                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2501                                 break;
2502                         case 12:
2503                                 /* LEA r1, [r2 + r2*2] */
2504                                 /* SHL r1, 2           */
2505                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2506                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2507                                 break;
2508                         case 25:
2509                                 /* LEA r1, [r2 + r2*4] */
2510                                 /* LEA r1, [r1 + r1*4] */
2511                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2512                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2513                                 break;
2514                         case 100:
2515                                 /* LEA r1, [r2 + r2*4] */
2516                                 /* SHL r1, 2           */
2517                                 /* LEA r1, [r1 + r1*4] */
2518                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2519                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2520                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2521                                 break;
2522                         default:
2523                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2524                                 break;
2525                         }
2526                         break;
2527                 case CEE_MUL_OVF:
2528                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2529                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2530                         break;
2531                 case CEE_MUL_OVF_UN: {
2532                         /* the mul operation and the exception check should most likely be split */
2533                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2534                         /*g_assert (ins->sreg2 == X86_EAX);
2535                         g_assert (ins->dreg == X86_EAX);*/
2536                         if (ins->sreg2 == X86_EAX) {
2537                                 non_eax_reg = ins->sreg1;
2538                         } else if (ins->sreg1 == X86_EAX) {
2539                                 non_eax_reg = ins->sreg2;
2540                         } else {
2541                                 /* no need to save since we're going to store to it anyway */
2542                                 if (ins->dreg != X86_EAX) {
2543                                         saved_eax = TRUE;
2544                                         x86_push_reg (code, X86_EAX);
2545                                 }
2546                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2547                                 non_eax_reg = ins->sreg2;
2548                         }
2549                         if (ins->dreg == X86_EDX) {
2550                                 if (!saved_eax) {
2551                                         saved_eax = TRUE;
2552                                         x86_push_reg (code, X86_EAX);
2553                                 }
2554                         } else if (ins->dreg != X86_EAX) {
2555                                 saved_edx = TRUE;
2556                                 x86_push_reg (code, X86_EDX);
2557                         }
2558                         x86_mul_reg (code, non_eax_reg, FALSE);
2559                         /* save before the check since pop and mov don't change the flags */
2560                         if (ins->dreg != X86_EAX)
2561                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2562                         if (saved_edx)
2563                                 x86_pop_reg (code, X86_EDX);
2564                         if (saved_eax)
2565                                 x86_pop_reg (code, X86_EAX);
2566                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2567                         break;
2568                 }
2569                 case OP_ICONST:
2570                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2571                         break;
2572                 case OP_AOTCONST:
2573                         g_assert_not_reached ();
2574                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2575                         x86_mov_reg_imm (code, ins->dreg, 0);
2576                         break;
2577                 case OP_LOAD_GOTADDR:
2578                         x86_call_imm (code, 0);
2579                         /* 
2580                          * The patch needs to point to the pop, since the GOT offset needs 
2581                          * to be added to that address.
2582                          */
2583                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2584                         x86_pop_reg (code, ins->dreg);
2585                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2586                         break;
2587                 case OP_GOT_ENTRY:
2588                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2589                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2590                         break;
2591                 case OP_X86_PUSH_GOT_ENTRY:
2592                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2593                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2594                         break;
2595                 case CEE_CONV_I4:
2596                 case OP_MOVE:
2597                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2598                         break;
2599                 case CEE_CONV_U4:
2600                         g_assert_not_reached ();
2601                 case OP_JMP: {
2602                         /*
2603                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2604                          * Keep in sync with the code in emit_epilog.
2605                          */
2606                         int pos = 0;
2607
2608                         /* FIXME: no tracing support... */
2609                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2610                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2611                         /* reset offset to make max_len work */
2612                         offset = code - cfg->native_code;
2613
2614                         g_assert (!cfg->method->save_lmf);
2615
2616                         code = emit_load_volatile_arguments (cfg, code);
2617
2618                         if (cfg->used_int_regs & (1 << X86_EBX))
2619                                 pos -= 4;
2620                         if (cfg->used_int_regs & (1 << X86_EDI))
2621                                 pos -= 4;
2622                         if (cfg->used_int_regs & (1 << X86_ESI))
2623                                 pos -= 4;
2624                         if (pos)
2625                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2626         
2627                         if (cfg->used_int_regs & (1 << X86_ESI))
2628                                 x86_pop_reg (code, X86_ESI);
2629                         if (cfg->used_int_regs & (1 << X86_EDI))
2630                                 x86_pop_reg (code, X86_EDI);
2631                         if (cfg->used_int_regs & (1 << X86_EBX))
2632                                 x86_pop_reg (code, X86_EBX);
2633         
2634                         /* restore ESP/EBP */
2635                         x86_leave (code);
2636                         offset = code - cfg->native_code;
2637                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2638                         x86_jump32 (code, 0);
2639                         break;
2640                 }
2641                 case OP_CHECK_THIS:
2642                         /* ensure ins->sreg1 is not NULL
2643                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2644                          * cmp DWORD PTR [eax], 0
2645                          */
2646                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2647                         break;
2648                 case OP_ARGLIST: {
2649                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2650                         x86_push_reg (code, hreg);
2651                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2652                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2653                         x86_pop_reg (code, hreg);
2654                         break;
2655                 }
2656                 case OP_FCALL:
2657                 case OP_LCALL:
2658                 case OP_VCALL:
2659                 case OP_VOIDCALL:
2660                 case CEE_CALL:
2661                         call = (MonoCallInst*)ins;
2662                         if (ins->flags & MONO_INST_HAS_METHOD)
2663                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2664                         else
2665                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2666                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2667                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2668                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2669                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2670                                  * smart enough to do that optimization yet
2671                                  *
2672                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2673                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2674                                  * (most likely from locality benefits). People with other processors should
2675                                  * check on theirs to see what happens.
2676                                  */
2677                                 if (call->stack_usage == 4) {
2678                                         /* we want to use registers that won't get used soon, so use
2679                                          * ecx, as eax will get allocated first. edx is used by long calls,
2680                                          * so we can't use that.
2681                                          */
2682                                         
2683                                         x86_pop_reg (code, X86_ECX);
2684                                 } else {
2685                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2686                                 }
2687                         }
2688                         code = emit_move_return_value (cfg, ins, code);
2689                         break;
2690                 case OP_FCALL_REG:
2691                 case OP_LCALL_REG:
2692                 case OP_VCALL_REG:
2693                 case OP_VOIDCALL_REG:
2694                 case OP_CALL_REG:
2695                         call = (MonoCallInst*)ins;
2696                         x86_call_reg (code, ins->sreg1);
2697                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2698                                 if (call->stack_usage == 4)
2699                                         x86_pop_reg (code, X86_ECX);
2700                                 else
2701                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2702                         }
2703                         code = emit_move_return_value (cfg, ins, code);
2704                         break;
2705                 case OP_FCALL_MEMBASE:
2706                 case OP_LCALL_MEMBASE:
2707                 case OP_VCALL_MEMBASE:
2708                 case OP_VOIDCALL_MEMBASE:
2709                 case OP_CALL_MEMBASE:
2710                         call = (MonoCallInst*)ins;
2711                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2712                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2713                                 if (call->stack_usage == 4)
2714                                         x86_pop_reg (code, X86_ECX);
2715                                 else
2716                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2717                         }
2718                         code = emit_move_return_value (cfg, ins, code);
2719                         break;
2720                 case OP_OUTARG:
2721                 case OP_X86_PUSH:
2722                         x86_push_reg (code, ins->sreg1);
2723                         break;
2724                 case OP_X86_PUSH_IMM:
2725                         x86_push_imm (code, ins->inst_imm);
2726                         break;
2727                 case OP_X86_PUSH_MEMBASE:
2728                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2729                         break;
2730                 case OP_X86_PUSH_OBJ: 
2731                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2732                         x86_push_reg (code, X86_EDI);
2733                         x86_push_reg (code, X86_ESI);
2734                         x86_push_reg (code, X86_ECX);
2735                         if (ins->inst_offset)
2736                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2737                         else
2738                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2739                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2740                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2741                         x86_cld (code);
2742                         x86_prefix (code, X86_REP_PREFIX);
2743                         x86_movsd (code);
2744                         x86_pop_reg (code, X86_ECX);
2745                         x86_pop_reg (code, X86_ESI);
2746                         x86_pop_reg (code, X86_EDI);
2747                         break;
2748                 case OP_X86_LEA:
2749                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2750                         break;
2751                 case OP_X86_LEA_MEMBASE:
2752                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2753                         break;
2754                 case OP_X86_XCHG:
2755                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2756                         break;
2757                 case OP_LOCALLOC:
2758                         /* keep alignment */
2759                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2760                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2761                         code = mono_emit_stack_alloc (code, ins);
2762                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2763                         break;
2764                 case CEE_RET:
2765                         x86_ret (code);
2766                         break;
2767                 case OP_THROW: {
2768                         x86_push_reg (code, ins->sreg1);
2769                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2770                                                           (gpointer)"mono_arch_throw_exception");
2771                         break;
2772                 }
2773                 case OP_RETHROW: {
2774                         x86_push_reg (code, ins->sreg1);
2775                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2776                                                           (gpointer)"mono_arch_rethrow_exception");
2777                         break;
2778                 }
2779                 case OP_CALL_HANDLER: 
2780                         /* Align stack */
2781 #ifdef __APPLE__
2782                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
2783 #endif
2784                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2785                         x86_call_imm (code, 0);
2786 #ifdef __APPLE__
2787                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2788 #endif
2789                         break;
2790                 case OP_LABEL:
2791                         ins->inst_c0 = code - cfg->native_code;
2792                         break;
2793                 case OP_BR:
2794                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2795                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2796                         //break;
2797                         if (ins->flags & MONO_INST_BRLABEL) {
2798                                 if (ins->inst_i0->inst_c0) {
2799                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2800                                 } else {
2801                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2802                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2803                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2804                                                 x86_jump8 (code, 0);
2805                                         else 
2806                                                 x86_jump32 (code, 0);
2807                                 }
2808                         } else {
2809                                 if (ins->inst_target_bb->native_offset) {
2810                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2811                                 } else {
2812                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2813                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2814                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2815                                                 x86_jump8 (code, 0);
2816                                         else 
2817                                                 x86_jump32 (code, 0);
2818                                 } 
2819                         }
2820                         break;
2821                 case OP_BR_REG:
2822                         x86_jump_reg (code, ins->sreg1);
2823                         break;
2824                 case OP_CEQ:
2825                 case OP_CLT:
2826                 case OP_CLT_UN:
2827                 case OP_CGT:
2828                 case OP_CGT_UN:
2829                 case OP_CNE:
2830                         x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2831                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2832                         break;
2833                 case OP_COND_EXC_EQ:
2834                 case OP_COND_EXC_NE_UN:
2835                 case OP_COND_EXC_LT:
2836                 case OP_COND_EXC_LT_UN:
2837                 case OP_COND_EXC_GT:
2838                 case OP_COND_EXC_GT_UN:
2839                 case OP_COND_EXC_GE:
2840                 case OP_COND_EXC_GE_UN:
2841                 case OP_COND_EXC_LE:
2842                 case OP_COND_EXC_LE_UN:
2843                         EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
2844                         break;
2845                 case OP_COND_EXC_OV:
2846                 case OP_COND_EXC_NO:
2847                 case OP_COND_EXC_C:
2848                 case OP_COND_EXC_NC:
2849                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2850                         break;
2851                 case CEE_BEQ:
2852                 case CEE_BNE_UN:
2853                 case CEE_BLT:
2854                 case CEE_BLT_UN:
2855                 case CEE_BGT:
2856                 case CEE_BGT_UN:
2857                 case CEE_BGE:
2858                 case CEE_BGE_UN:
2859                 case CEE_BLE:
2860                 case CEE_BLE_UN:
2861                         EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2862                         break;
2863
2864                 /* floating point opcodes */
2865                 case OP_R8CONST: {
2866                         double d = *(double *)ins->inst_p0;
2867
2868                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2869                                 x86_fldz (code);
2870                         } else if (d == 1.0) {
2871                                 x86_fld1 (code);
2872                         } else {
2873                                 if (cfg->compile_aot) {
2874                                         guint32 *val = (guint32*)&d;
2875                                         x86_push_imm (code, val [1]);
2876                                         x86_push_imm (code, val [0]);
2877                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2878                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2879                                 }
2880                                 else {
2881                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2882                                         x86_fld (code, NULL, TRUE);
2883                                 }
2884                         }
2885                         break;
2886                 }
2887                 case OP_R4CONST: {
2888                         float f = *(float *)ins->inst_p0;
2889
2890                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2891                                 x86_fldz (code);
2892                         } else if (f == 1.0) {
2893                                 x86_fld1 (code);
2894                         } else {
2895                                 if (cfg->compile_aot) {
2896                                         guint32 val = *(guint32*)&f;
2897                                         x86_push_imm (code, val);
2898                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2899                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2900                                 }
2901                                 else {
2902                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2903                                         x86_fld (code, NULL, FALSE);
2904                                 }
2905                         }
2906                         break;
2907                 }
2908                 case OP_STORER8_MEMBASE_REG:
2909                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2910                         break;
2911                 case OP_LOADR8_SPILL_MEMBASE:
2912                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2913                         x86_fxch (code, 1);
2914                         break;
2915                 case OP_LOADR8_MEMBASE:
2916                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2917                         break;
2918                 case OP_STORER4_MEMBASE_REG:
2919                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2920                         break;
2921                 case OP_LOADR4_MEMBASE:
2922                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2923                         break;
2924                 case CEE_CONV_R4: /* FIXME: change precision */
2925                 case CEE_CONV_R8:
2926                         x86_push_reg (code, ins->sreg1);
2927                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2928                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2929                         break;
2930                 case OP_X86_FP_LOAD_I8:
2931                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2932                         break;
2933                 case OP_X86_FP_LOAD_I4:
2934                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2935                         break;
2936                 case OP_FCONV_TO_I1:
2937                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2938                         break;
2939                 case OP_FCONV_TO_U1:
2940                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2941                         break;
2942                 case OP_FCONV_TO_I2:
2943                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2944                         break;
2945                 case OP_FCONV_TO_U2:
2946                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2947                         break;
2948                 case OP_FCONV_TO_I4:
2949                 case OP_FCONV_TO_I:
2950                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2951                         break;
2952                 case OP_FCONV_TO_I8:
2953                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2954                         x86_fnstcw_membase(code, X86_ESP, 0);
2955                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2956                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2957                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2958                         x86_fldcw_membase (code, X86_ESP, 2);
2959                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2960                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2961                         x86_pop_reg (code, ins->dreg);
2962                         x86_pop_reg (code, ins->backend.reg3);
2963                         x86_fldcw_membase (code, X86_ESP, 0);
2964                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2965                         break;
2966                 case OP_LCONV_TO_R_UN: { 
2967                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2968                         guint8 *br;
2969
2970                         /* load 64bit integer to FP stack */
2971                         x86_push_imm (code, 0);
2972                         x86_push_reg (code, ins->sreg2);
2973                         x86_push_reg (code, ins->sreg1);
2974                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2975                         /* store as 80bit FP value */
2976                         x86_fst80_membase (code, X86_ESP, 0);
2977                         
2978                         /* test if lreg is negative */
2979                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2980                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2981         
2982                         /* add correction constant mn */
2983                         x86_fld80_mem (code, mn);
2984                         x86_fld80_membase (code, X86_ESP, 0);
2985                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2986                         x86_fst80_membase (code, X86_ESP, 0);
2987
2988                         x86_patch (br, code);
2989
2990                         x86_fld80_membase (code, X86_ESP, 0);
2991                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2992
2993                         break;
2994                 }
2995                 case OP_LCONV_TO_OVF_I: {
2996                         guint8 *br [3], *label [1];
2997                         MonoInst *tins;
2998
2999                         /* 
3000                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3001                          */
3002                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3003
3004                         /* If the low word top bit is set, see if we are negative */
3005                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3006                         /* We are not negative (no top bit set, check for our top word to be zero */
3007                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3008                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3009                         label [0] = code;
3010
3011                         /* throw exception */
3012                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
3013                         if (tins) {
3014                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
3015                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
3016                                         x86_jump8 (code, 0);
3017                                 else
3018                                         x86_jump32 (code, 0);
3019                         } else {
3020                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3021                                 x86_jump32 (code, 0);
3022                         }
3023         
3024         
3025                         x86_patch (br [0], code);
3026                         /* our top bit is set, check that top word is 0xfffffff */
3027                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3028                 
3029                         x86_patch (br [1], code);
3030                         /* nope, emit exception */
3031                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3032                         x86_patch (br [2], label [0]);
3033
3034                         if (ins->dreg != ins->sreg1)
3035                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3036                         break;
3037                 }
3038                 case OP_FADD:
3039                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3040                         break;
3041                 case OP_FSUB:
3042                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3043                         break;          
3044                 case OP_FMUL:
3045                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3046                         break;          
3047                 case OP_FDIV:
3048                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3049                         break;          
3050                 case OP_FNEG:
3051                         x86_fchs (code);
3052                         break;          
3053                 case OP_SIN:
3054                         x86_fsin (code);
3055                         x86_fldz (code);
3056                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3057                         break;          
3058                 case OP_COS:
3059                         x86_fcos (code);
3060                         x86_fldz (code);
3061                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3062                         break;          
3063                 case OP_ABS:
3064                         x86_fabs (code);
3065                         break;          
3066                 case OP_TAN: {
3067                         /* 
3068                          * it really doesn't make sense to inline all this code,
3069                          * it's here just to show that things may not be as simple 
3070                          * as they appear.
3071                          */
3072                         guchar *check_pos, *end_tan, *pop_jump;
3073                         x86_push_reg (code, X86_EAX);
3074                         x86_fptan (code);
3075                         x86_fnstsw (code);
3076                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3077                         check_pos = code;
3078                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3079                         x86_fstp (code, 0); /* pop the 1.0 */
3080                         end_tan = code;
3081                         x86_jump8 (code, 0);
3082                         x86_fldpi (code);
3083                         x86_fp_op (code, X86_FADD, 0);
3084                         x86_fxch (code, 1);
3085                         x86_fprem1 (code);
3086                         x86_fstsw (code);
3087                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3088                         pop_jump = code;
3089                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3090                         x86_fstp (code, 1);
3091                         x86_fptan (code);
3092                         x86_patch (pop_jump, code);
3093                         x86_fstp (code, 0); /* pop the 1.0 */
3094                         x86_patch (check_pos, code);
3095                         x86_patch (end_tan, code);
3096                         x86_fldz (code);
3097                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3098                         x86_pop_reg (code, X86_EAX);
3099                         break;
3100                 }
3101                 case OP_ATAN:
3102                         x86_fld1 (code);
3103                         x86_fpatan (code);
3104                         x86_fldz (code);
3105                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3106                         break;          
3107                 case OP_SQRT:
3108                         x86_fsqrt (code);
3109                         break;          
3110                 case OP_X86_FPOP:
3111                         x86_fstp (code, 0);
3112                         break;          
3113                 case OP_FREM: {
3114                         guint8 *l1, *l2;
3115
3116                         x86_push_reg (code, X86_EAX);
3117                         /* we need to exchange ST(0) with ST(1) */
3118                         x86_fxch (code, 1);
3119
3120                         /* this requires a loop, because fprem somtimes 
3121                          * returns a partial remainder */
3122                         l1 = code;
3123                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3124                         /* x86_fprem1 (code); */
3125                         x86_fprem (code);
3126                         x86_fnstsw (code);
3127                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3128                         l2 = code + 2;
3129                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3130
3131                         /* pop result */
3132                         x86_fstp (code, 1);
3133
3134                         x86_pop_reg (code, X86_EAX);
3135                         break;
3136                 }
3137                 case OP_FCOMPARE:
3138                         if (cfg->opt & MONO_OPT_FCMOV) {
3139                                 x86_fcomip (code, 1);
3140                                 x86_fstp (code, 0);
3141                                 break;
3142                         }
3143                         /* this overwrites EAX */
3144                         EMIT_FPCOMPARE(code);
3145                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3146                         break;
3147                 case OP_FCEQ:
3148                         if (cfg->opt & MONO_OPT_FCMOV) {
3149                                 /* zeroing the register at the start results in 
3150                                  * shorter and faster code (we can also remove the widening op)
3151                                  */
3152                                 guchar *unordered_check;
3153                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3154                                 x86_fcomip (code, 1);
3155                                 x86_fstp (code, 0);
3156                                 unordered_check = code;
3157                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3158                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3159                                 x86_patch (unordered_check, code);
3160                                 break;
3161                         }
3162                         if (ins->dreg != X86_EAX) 
3163                                 x86_push_reg (code, X86_EAX);
3164
3165                         EMIT_FPCOMPARE(code);
3166                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3167                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3168                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3169                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3170
3171                         if (ins->dreg != X86_EAX) 
3172                                 x86_pop_reg (code, X86_EAX);
3173                         break;
3174                 case OP_FCLT:
3175                 case OP_FCLT_UN:
3176                         if (cfg->opt & MONO_OPT_FCMOV) {
3177                                 /* zeroing the register at the start results in 
3178                                  * shorter and faster code (we can also remove the widening op)
3179                                  */
3180                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3181                                 x86_fcomip (code, 1);
3182                                 x86_fstp (code, 0);
3183                                 if (ins->opcode == OP_FCLT_UN) {
3184                                         guchar *unordered_check = code;
3185                                         guchar *jump_to_end;
3186                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3187                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3188                                         jump_to_end = code;
3189                                         x86_jump8 (code, 0);
3190                                         x86_patch (unordered_check, code);
3191                                         x86_inc_reg (code, ins->dreg);
3192                                         x86_patch (jump_to_end, code);
3193                                 } else {
3194                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3195                                 }
3196                                 break;
3197                         }
3198                         if (ins->dreg != X86_EAX) 
3199                                 x86_push_reg (code, X86_EAX);
3200
3201                         EMIT_FPCOMPARE(code);
3202                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3203                         if (ins->opcode == OP_FCLT_UN) {
3204                                 guchar *is_not_zero_check, *end_jump;
3205                                 is_not_zero_check = code;
3206                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3207                                 end_jump = code;
3208                                 x86_jump8 (code, 0);
3209                                 x86_patch (is_not_zero_check, code);
3210                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3211
3212                                 x86_patch (end_jump, code);
3213                         }
3214                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3215                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3216
3217                         if (ins->dreg != X86_EAX) 
3218                                 x86_pop_reg (code, X86_EAX);
3219                         break;
3220                 case OP_FCGT:
3221                 case OP_FCGT_UN:
3222                         if (cfg->opt & MONO_OPT_FCMOV) {
3223                                 /* zeroing the register at the start results in 
3224                                  * shorter and faster code (we can also remove the widening op)
3225                                  */
3226                                 guchar *unordered_check;
3227                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3228                                 x86_fcomip (code, 1);
3229                                 x86_fstp (code, 0);
3230                                 if (ins->opcode == OP_FCGT) {
3231                                         unordered_check = code;
3232                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3233                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3234                                         x86_patch (unordered_check, code);
3235                                 } else {
3236                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3237                                 }
3238                                 break;
3239                         }
3240                         if (ins->dreg != X86_EAX) 
3241                                 x86_push_reg (code, X86_EAX);
3242
3243                         EMIT_FPCOMPARE(code);
3244                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3245                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3246                         if (ins->opcode == OP_FCGT_UN) {
3247                                 guchar *is_not_zero_check, *end_jump;
3248                                 is_not_zero_check = code;
3249                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3250                                 end_jump = code;
3251                                 x86_jump8 (code, 0);
3252                                 x86_patch (is_not_zero_check, code);
3253                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3254         
3255                                 x86_patch (end_jump, code);
3256                         }
3257                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3258                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3259
3260                         if (ins->dreg != X86_EAX) 
3261                                 x86_pop_reg (code, X86_EAX);
3262                         break;
3263                 case OP_FBEQ:
3264                         if (cfg->opt & MONO_OPT_FCMOV) {
3265                                 guchar *jump = code;
3266                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3267                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3268                                 x86_patch (jump, code);
3269                                 break;
3270                         }
3271                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3272                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3273                         break;
3274                 case OP_FBNE_UN:
3275                         /* Branch if C013 != 100 */
3276                         if (cfg->opt & MONO_OPT_FCMOV) {
3277                                 /* branch if !ZF or (PF|CF) */
3278                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3279                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3280                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3281                                 break;
3282                         }
3283                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3284                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3285                         break;
3286                 case OP_FBLT:
3287                         if (cfg->opt & MONO_OPT_FCMOV) {
3288                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3289                                 break;
3290                         }
3291                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3292                         break;
3293                 case OP_FBLT_UN:
3294                         if (cfg->opt & MONO_OPT_FCMOV) {
3295                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3296                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3297                                 break;
3298                         }
3299                         if (ins->opcode == OP_FBLT_UN) {
3300                                 guchar *is_not_zero_check, *end_jump;
3301                                 is_not_zero_check = code;
3302                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3303                                 end_jump = code;
3304                                 x86_jump8 (code, 0);
3305                                 x86_patch (is_not_zero_check, code);
3306                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3307
3308                                 x86_patch (end_jump, code);
3309                         }
3310                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3311                         break;
3312                 case OP_FBGT:
3313                 case OP_FBGT_UN:
3314                         if (cfg->opt & MONO_OPT_FCMOV) {
3315                                 if (ins->opcode == OP_FBGT) {
3316                                         guchar *br1;
3317
3318                                         /* skip branch if C1=1 */
3319                                         br1 = code;
3320                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3321                                         /* branch if (C0 | C3) = 1 */
3322                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3323                                         x86_patch (br1, code);
3324                                 } else {
3325                                         EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3326                                 }
3327                                 break;
3328                         }
3329                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3330                         if (ins->opcode == OP_FBGT_UN) {
3331                                 guchar *is_not_zero_check, *end_jump;
3332                                 is_not_zero_check = code;
3333                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3334                                 end_jump = code;
3335                                 x86_jump8 (code, 0);
3336                                 x86_patch (is_not_zero_check, code);
3337                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3338
3339                                 x86_patch (end_jump, code);
3340                         }
3341                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3342                         break;
3343                 case OP_FBGE:
3344                         /* Branch if C013 == 100 or 001 */
3345                         if (cfg->opt & MONO_OPT_FCMOV) {
3346                                 guchar *br1;
3347
3348                                 /* skip branch if C1=1 */
3349                                 br1 = code;
3350                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3351                                 /* branch if (C0 | C3) = 1 */
3352                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3353                                 x86_patch (br1, code);
3354                                 break;
3355                         }
3356                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3357                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3358                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3359                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3360                         break;
3361                 case OP_FBGE_UN:
3362                         /* Branch if C013 == 000 */
3363                         if (cfg->opt & MONO_OPT_FCMOV) {
3364                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3365                                 break;
3366                         }
3367                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3368                         break;
3369                 case OP_FBLE:
3370                         /* Branch if C013=000 or 100 */
3371                         if (cfg->opt & MONO_OPT_FCMOV) {
3372                                 guchar *br1;
3373
3374                                 /* skip branch if C1=1 */
3375                                 br1 = code;
3376                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3377                                 /* branch if C0=0 */
3378                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3379                                 x86_patch (br1, code);
3380                                 break;
3381                         }
3382                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3383                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3384                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3385                         break;
3386                 case OP_FBLE_UN:
3387                         /* Branch if C013 != 001 */
3388                         if (cfg->opt & MONO_OPT_FCMOV) {
3389                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3390                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3391                                 break;
3392                         }
3393                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3394                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3395                         break;
3396                 case OP_CKFINITE: {
3397                         x86_push_reg (code, X86_EAX);
3398                         x86_fxam (code);
3399                         x86_fnstsw (code);
3400                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3401                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3402                         x86_pop_reg (code, X86_EAX);
3403                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3404                         break;
3405                 }
3406                 case OP_TLS_GET: {
3407                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3408                         break;
3409                 }
3410                 case OP_MEMORY_BARRIER: {
3411                         /* Not needed on x86 */
3412                         break;
3413                 }
3414                 case OP_ATOMIC_ADD_I4: {
3415                         int dreg = ins->dreg;
3416
3417                         if (dreg == ins->inst_basereg) {
3418                                 x86_push_reg (code, ins->sreg2);
3419                                 dreg = ins->sreg2;
3420                         } 
3421                         
3422                         if (dreg != ins->sreg2)
3423                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3424
3425                         x86_prefix (code, X86_LOCK_PREFIX);
3426                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3427
3428                         if (dreg != ins->dreg) {
3429                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3430                                 x86_pop_reg (code, dreg);
3431                         }
3432
3433                         break;
3434                 }
3435                 case OP_ATOMIC_ADD_NEW_I4: {
3436                         int dreg = ins->dreg;
3437
3438                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3439                         if (ins->sreg2 == dreg) {
3440                                 if (dreg == X86_EBX) {
3441                                         dreg = X86_EDI;
3442                                         if (ins->inst_basereg == X86_EDI)
3443                                                 dreg = X86_ESI;
3444                                 } else {
3445                                         dreg = X86_EBX;
3446                                         if (ins->inst_basereg == X86_EBX)
3447                                                 dreg = X86_EDI;
3448                                 }
3449                         } else if (ins->inst_basereg == dreg) {
3450                                 if (dreg == X86_EBX) {
3451                                         dreg = X86_EDI;
3452                                         if (ins->sreg2 == X86_EDI)
3453                                                 dreg = X86_ESI;
3454                                 } else {
3455                                         dreg = X86_EBX;
3456                                         if (ins->sreg2 == X86_EBX)
3457                                                 dreg = X86_EDI;
3458                                 }
3459                         }
3460
3461                         if (dreg != ins->dreg) {
3462                                 x86_push_reg (code, dreg);
3463                         }
3464
3465                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3466                         x86_prefix (code, X86_LOCK_PREFIX);
3467                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3468                         /* dreg contains the old value, add with sreg2 value */
3469                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3470                         
3471                         if (ins->dreg != dreg) {
3472                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3473                                 x86_pop_reg (code, dreg);
3474                         }
3475
3476                         break;
3477                 }
3478                 case OP_ATOMIC_EXCHANGE_I4: {
3479                         guchar *br[2];
3480                         int sreg2 = ins->sreg2;
3481                         int breg = ins->inst_basereg;
3482
3483                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3484                          * hack to overcome limits in x86 reg allocator 
3485                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3486                          */
3487                         if (ins->dreg != X86_EAX)
3488                                 x86_push_reg (code, X86_EAX);
3489                         
3490                         /* We need the EAX reg for the cmpxchg */
3491                         if (ins->sreg2 == X86_EAX) {
3492                                 x86_push_reg (code, X86_EDX);
3493                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3494                                 sreg2 = X86_EDX;
3495                         }
3496
3497                         if (breg == X86_EAX) {
3498                                 x86_push_reg (code, X86_ESI);
3499                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3500                                 breg = X86_ESI;
3501                         }
3502
3503                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3504
3505                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3506                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3507                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3508                         x86_patch (br [1], br [0]);
3509
3510                         if (breg != ins->inst_basereg)
3511                                 x86_pop_reg (code, X86_ESI);
3512
3513                         if (ins->dreg != X86_EAX) {
3514                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3515                                 x86_pop_reg (code, X86_EAX);
3516                         }
3517
3518                         if (ins->sreg2 != sreg2)
3519                                 x86_pop_reg (code, X86_EDX);
3520
3521                         break;
3522                 }
3523                 default:
3524                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3525                         g_assert_not_reached ();
3526                 }
3527
3528                 if ((code - cfg->native_code - offset) > max_len) {
3529                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3530                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3531                         g_assert_not_reached ();
3532                 }
3533                
3534                 cpos += max_len;
3535
3536                 last_ins = ins;
3537                 last_offset = offset;
3538                 
3539                 ins = ins->next;
3540         }
3541
3542         cfg->code_len = code - cfg->native_code;
3543 }
3544
3545 void
3546 mono_arch_register_lowlevel_calls (void)
3547 {
3548 }
3549
3550 void
3551 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3552 {
3553         MonoJumpInfo *patch_info;
3554         gboolean compile_aot = !run_cctors;
3555
3556         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3557                 unsigned char *ip = patch_info->ip.i + code;
3558                 const unsigned char *target;
3559
3560                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3561
3562                 if (compile_aot) {
3563                         switch (patch_info->type) {
3564                         case MONO_PATCH_INFO_BB:
3565                         case MONO_PATCH_INFO_LABEL:
3566                                 break;
3567                         default:
3568                                 /* No need to patch these */
3569                                 continue;
3570                         }
3571                 }
3572
3573                 switch (patch_info->type) {
3574                 case MONO_PATCH_INFO_IP:
3575                         *((gconstpointer *)(ip)) = target;
3576                         break;
3577                 case MONO_PATCH_INFO_CLASS_INIT: {
3578                         guint8 *code = ip;
3579                         /* Might already been changed to a nop */
3580                         x86_call_code (code, 0);
3581                         x86_patch (ip, target);
3582                         break;
3583                 }
3584                 case MONO_PATCH_INFO_ABS:
3585                 case MONO_PATCH_INFO_METHOD:
3586                 case MONO_PATCH_INFO_METHOD_JUMP:
3587                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3588                 case MONO_PATCH_INFO_BB:
3589                 case MONO_PATCH_INFO_LABEL:
3590                         x86_patch (ip, target);
3591                         break;
3592                 case MONO_PATCH_INFO_NONE:
3593                         break;
3594                 default: {
3595                         guint32 offset = mono_arch_get_patch_offset (ip);
3596                         *((gconstpointer *)(ip + offset)) = target;
3597                         break;
3598                 }
3599                 }
3600         }
3601 }
3602
3603 guint8 *
3604 mono_arch_emit_prolog (MonoCompile *cfg)
3605 {
3606         MonoMethod *method = cfg->method;
3607         MonoBasicBlock *bb;
3608         MonoMethodSignature *sig;
3609         MonoInst *inst;
3610         int alloc_size, pos, max_offset, i;
3611         guint8 *code;
3612
3613         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 1024);
3614
3615         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3616                 cfg->code_size += 512;
3617
3618         code = cfg->native_code = g_malloc (cfg->code_size);
3619
3620         x86_push_reg (code, X86_EBP);
3621         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3622
3623         alloc_size = cfg->stack_offset;
3624         pos = 0;
3625
3626         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3627                 /* Might need to attach the thread to the JIT  or change the domain for the callback */
3628                 if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
3629                         guint8 *buf, *no_domain_branch;
3630
3631                         code = emit_tls_get (code, X86_EAX, appdomain_tls_offset);
3632                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, GPOINTER_TO_UINT (cfg->domain));
3633                         no_domain_branch = code;
3634                         x86_branch8 (code, X86_CC_NE, 0, 0);
3635                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3636                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3637                         buf = code;
3638                         x86_branch8 (code, X86_CC_NE, 0, 0);
3639                         x86_patch (no_domain_branch, code);
3640                         x86_push_imm (code, cfg->domain);
3641                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3642                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3643                         x86_patch (buf, code);
3644 #ifdef PLATFORM_WIN32
3645                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3646                         /* FIXME: Add a separate key for LMF to avoid this */
3647                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3648 #endif
3649                 } else {
3650                         g_assert (!cfg->compile_aot);
3651                         x86_push_imm (code, cfg->domain);
3652                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3653                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3654                 }
3655         }
3656
3657         if (method->save_lmf) {
3658                 pos += sizeof (MonoLMF);
3659
3660                 /* save the current IP */
3661                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3662                 x86_push_imm_template (code);
3663
3664                 /* save all caller saved regs */
3665                 x86_push_reg (code, X86_EBP);
3666                 x86_push_reg (code, X86_ESI);
3667                 x86_push_reg (code, X86_EDI);
3668                 x86_push_reg (code, X86_EBX);
3669
3670                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3671                         /*
3672                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3673                          * through the mono_lmf_addr TLS variable.
3674                          */
3675                         /* %eax = previous_lmf */
3676                         x86_prefix (code, X86_GS_PREFIX);
3677                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3678                         /* skip esp + method_info + lmf */
3679                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
3680                         /* push previous_lmf */
3681                         x86_push_reg (code, X86_EAX);
3682                         /* new lmf = ESP */
3683                         x86_prefix (code, X86_GS_PREFIX);
3684                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
3685                 } else {
3686                         /* get the address of lmf for the current thread */
3687                         /* 
3688                          * This is performance critical so we try to use some tricks to make
3689                          * it fast.
3690                          */                                                                        
3691
3692                         if (lmf_addr_tls_offset != -1) {
3693                                 /* Load lmf quicky using the GS register */
3694                                 code = emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
3695 #ifdef PLATFORM_WIN32
3696                                 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3697                                 /* FIXME: Add a separate key for LMF to avoid this */
3698                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3699 #endif
3700                         } else {
3701                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3702                         }
3703
3704                         /* Skip esp + method info */
3705                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3706
3707                         /* push lmf */
3708                         x86_push_reg (code, X86_EAX); 
3709                         /* push *lfm (previous_lmf) */
3710                         x86_push_membase (code, X86_EAX, 0);
3711                         /* *(lmf) = ESP */
3712                         x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3713                 }
3714         } else {
3715
3716                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3717                         x86_push_reg (code, X86_EBX);
3718                         pos += 4;
3719                 }
3720
3721                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3722                         x86_push_reg (code, X86_EDI);
3723                         pos += 4;
3724                 }
3725
3726                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3727                         x86_push_reg (code, X86_ESI);
3728                         pos += 4;
3729                 }
3730         }
3731
3732         alloc_size -= pos;
3733
3734 #if __APPLE__
3735         /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
3736         {
3737                 int tot = alloc_size + pos + 4 + 4; /* ret ip + ebp */
3738                 if (tot & 4) {
3739                         tot += 4;
3740                         alloc_size += 4;
3741                 }
3742                 if (tot & 8) {
3743                         alloc_size += 8;
3744                 }
3745         }
3746 #endif
3747
3748         if (alloc_size) {
3749                 /* See mono_emit_stack_alloc */
3750 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3751                 guint32 remaining_size = alloc_size;
3752                 while (remaining_size >= 0x1000) {
3753                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3754                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3755                         remaining_size -= 0x1000;
3756                 }
3757                 if (remaining_size)
3758                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3759 #else
3760                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3761 #endif
3762         }
3763
3764 #if __APPLE_
3765         /* check the stack is aligned */
3766         x86_mov_reg_reg (code, X86_EDX, X86_ESP, 4);
3767         x86_alu_reg_imm (code, X86_AND, X86_EDX, 15);
3768         x86_alu_reg_imm (code, X86_CMP, X86_EDX, 0);
3769         x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
3770         x86_breakpoint (code);
3771 #endif
3772
3773         /* compute max_offset in order to use short forward jumps */
3774         max_offset = 0;
3775         if (cfg->opt & MONO_OPT_BRANCH) {
3776                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3777                         MonoInst *ins = bb->code;
3778                         bb->max_offset = max_offset;
3779
3780                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3781                                 max_offset += 6;
3782                         /* max alignment for loops */
3783                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3784                                 max_offset += LOOP_ALIGNMENT;
3785
3786                         while (ins) {
3787                                 if (ins->opcode == OP_LABEL)
3788                                         ins->inst_c1 = max_offset;
3789                                 
3790                                 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
3791                                 ins = ins->next;
3792                         }
3793                 }
3794         }
3795
3796         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3797                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3798
3799         /* load arguments allocated to register from the stack */
3800         sig = mono_method_signature (method);
3801         pos = 0;
3802
3803         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3804                 inst = cfg->args [pos];
3805                 if (inst->opcode == OP_REGVAR) {
3806                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3807                         if (cfg->verbose_level > 2)
3808                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3809                 }
3810                 pos++;
3811         }
3812
3813         cfg->code_len = code - cfg->native_code;
3814
3815         return code;
3816 }
3817
3818 void
3819 mono_arch_emit_epilog (MonoCompile *cfg)
3820 {
3821         MonoMethod *method = cfg->method;
3822         MonoMethodSignature *sig = mono_method_signature (method);
3823         int quad, pos;
3824         guint32 stack_to_pop;
3825         guint8 *code;
3826         int max_epilog_size = 16;
3827         CallInfo *cinfo;
3828         
3829         if (cfg->method->save_lmf)
3830                 max_epilog_size += 128;
3831
3832         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3833                 cfg->code_size *= 2;
3834                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3835                 mono_jit_stats.code_reallocs++;
3836         }
3837
3838         code = cfg->native_code + cfg->code_len;
3839
3840         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3841                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3842
3843         /* the code restoring the registers must be kept in sync with OP_JMP */
3844         pos = 0;
3845         
3846         if (method->save_lmf) {
3847                 gint32 prev_lmf_reg;
3848                 gint32 lmf_offset = -sizeof (MonoLMF);
3849
3850                 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
3851                         /*
3852                          * Optimized version which uses the mono_lmf TLS variable instead of indirection
3853                          * through the mono_lmf_addr TLS variable.
3854                          */
3855                         /* reg = previous_lmf */
3856                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3857
3858                         /* lmf = previous_lmf */
3859                         x86_prefix (code, X86_GS_PREFIX);
3860                         x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
3861                 } else {
3862                         /* Find a spare register */
3863                         switch (sig->ret->type) {
3864                         case MONO_TYPE_I8:
3865                         case MONO_TYPE_U8:
3866                                 prev_lmf_reg = X86_EDI;
3867                                 cfg->used_int_regs |= (1 << X86_EDI);
3868                                 break;
3869                         default:
3870                                 prev_lmf_reg = X86_EDX;
3871                                 break;
3872                         }
3873
3874                         /* reg = previous_lmf */
3875                         x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3876
3877                         /* ecx = lmf */
3878                         x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3879
3880                         /* *(lmf) = previous_lmf */
3881                         x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3882                 }
3883
3884                 /* restore caller saved regs */
3885                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3886                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3887                 }
3888
3889                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3890                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3891                 }
3892                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3893                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3894                 }
3895
3896                 /* EBP is restored by LEAVE */
3897         } else {
3898                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3899                         pos -= 4;
3900                 }
3901                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3902                         pos -= 4;
3903                 }
3904                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3905                         pos -= 4;
3906                 }
3907
3908                 if (pos)
3909                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3910
3911                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3912                         x86_pop_reg (code, X86_ESI);
3913                 }
3914                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3915                         x86_pop_reg (code, X86_EDI);
3916                 }
3917                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3918                         x86_pop_reg (code, X86_EBX);
3919                 }
3920         }
3921
3922         /* Load returned vtypes into registers if needed */
3923         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
3924         if (cinfo->ret.storage == ArgValuetypeInReg) {
3925                 for (quad = 0; quad < 2; quad ++) {
3926                         switch (cinfo->ret.pair_storage [quad]) {
3927                         case ArgInIReg:
3928                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3929                                 break;
3930                         case ArgOnFloatFpStack:
3931                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3932                                 break;
3933                         case ArgOnDoubleFpStack:
3934                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3935                                 break;
3936                         case ArgNone:
3937                                 break;
3938                         default:
3939                                 g_assert_not_reached ();
3940                         }
3941                 }
3942         }
3943
3944         x86_leave (code);
3945
3946         if (CALLCONV_IS_STDCALL (sig)) {
3947                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3948
3949                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3950         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3951                 stack_to_pop = 4;
3952         else
3953                 stack_to_pop = 0;
3954
3955         if (stack_to_pop)
3956                 x86_ret_imm (code, stack_to_pop);
3957         else
3958                 x86_ret (code);
3959
3960         cfg->code_len = code - cfg->native_code;
3961
3962         g_assert (cfg->code_len < cfg->code_size);
3963 }
3964
3965 void
3966 mono_arch_emit_exceptions (MonoCompile *cfg)
3967 {
3968         MonoJumpInfo *patch_info;
3969         int nthrows, i;
3970         guint8 *code;
3971         MonoClass *exc_classes [16];
3972         guint8 *exc_throw_start [16], *exc_throw_end [16];
3973         guint32 code_size;
3974         int exc_count = 0;
3975
3976         /* Compute needed space */
3977         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3978                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3979                         exc_count++;
3980         }
3981
3982         /* 
3983          * make sure we have enough space for exceptions
3984          * 16 is the size of two push_imm instructions and a call
3985          */
3986         if (cfg->compile_aot)
3987                 code_size = exc_count * 32;
3988         else
3989                 code_size = exc_count * 16;
3990
3991         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3992                 cfg->code_size *= 2;
3993                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3994                 mono_jit_stats.code_reallocs++;
3995         }
3996
3997         code = cfg->native_code + cfg->code_len;
3998
3999         nthrows = 0;
4000         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4001                 switch (patch_info->type) {
4002                 case MONO_PATCH_INFO_EXC: {
4003                         MonoClass *exc_class;
4004                         guint8 *buf, *buf2;
4005                         guint32 throw_ip;
4006
4007                         x86_patch (patch_info->ip.i + cfg->native_code, code);
4008
4009                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4010                         g_assert (exc_class);
4011                         throw_ip = patch_info->ip.i;
4012
4013                         /* Find a throw sequence for the same exception class */
4014                         for (i = 0; i < nthrows; ++i)
4015                                 if (exc_classes [i] == exc_class)
4016                                         break;
4017                         if (i < nthrows) {
4018                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4019                                 x86_jump_code (code, exc_throw_start [i]);
4020                                 patch_info->type = MONO_PATCH_INFO_NONE;
4021                         }
4022                         else {
4023                                 guint32 size;
4024
4025                                 /* Compute size of code following the push <OFFSET> */
4026                                 size = 5 + 5;
4027
4028                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4029                                         /* Use the shorter form */
4030                                         buf = buf2 = code;
4031                                         x86_push_imm (code, 0);
4032                                 }
4033                                 else {
4034                                         buf = code;
4035                                         x86_push_imm (code, 0xf0f0f0f0);
4036                                         buf2 = code;
4037                                 }
4038
4039                                 if (nthrows < 16) {
4040                                         exc_classes [nthrows] = exc_class;
4041                                         exc_throw_start [nthrows] = code;
4042                                 }
4043
4044                                 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
4045                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4046                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4047                                 patch_info->ip.i = code - cfg->native_code;
4048                                 x86_call_code (code, 0);
4049                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4050                                 while (buf < buf2)
4051                                         x86_nop (buf);
4052
4053                                 if (nthrows < 16) {
4054                                         exc_throw_end [nthrows] = code;
4055                                         nthrows ++;
4056                                 }
4057                         }
4058                         break;
4059                 }
4060                 default:
4061                         /* do nothing */
4062                         break;
4063                 }
4064         }
4065
4066         cfg->code_len = code - cfg->native_code;
4067
4068         g_assert (cfg->code_len < cfg->code_size);
4069 }
4070
4071 void
4072 mono_arch_flush_icache (guint8 *code, gint size)
4073 {
4074         /* not needed */
4075 }
4076
4077 void
4078 mono_arch_flush_register_windows (void)
4079 {
4080 }
4081
4082 /*
4083  * Support for fast access to the thread-local lmf structure using the GS
4084  * segment register on NPTL + kernel 2.6.x.
4085  */
4086
4087 static gboolean tls_offset_inited = FALSE;
4088
4089 void
4090 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4091 {
4092         if (!tls_offset_inited) {
4093                 if (!getenv ("MONO_NO_TLS")) {
4094 #ifdef PLATFORM_WIN32
4095                         /* 
4096                          * We need to init this multiple times, since when we are first called, the key might not
4097                          * be initialized yet.
4098                          */
4099                         appdomain_tls_offset = mono_domain_get_tls_key ();
4100                         lmf_tls_offset = mono_get_jit_tls_key ();
4101                         thread_tls_offset = mono_thread_get_tls_key ();
4102
4103                         /* Only 64 tls entries can be accessed using inline code */
4104                         if (appdomain_tls_offset >= 64)
4105                                 appdomain_tls_offset = -1;
4106                         if (lmf_tls_offset >= 64)
4107                                 lmf_tls_offset = -1;
4108                         if (thread_tls_offset >= 64)
4109                                 thread_tls_offset = -1;
4110 #else
4111 #if MONO_XEN_OPT
4112                         optimize_for_xen = access ("/proc/xen", F_OK) == 0;
4113 #endif
4114                         tls_offset_inited = TRUE;
4115                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4116                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4117                         lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
4118                         thread_tls_offset = mono_thread_get_tls_offset ();
4119 #endif
4120                 }
4121         }               
4122 }
4123
4124 void
4125 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4126 {
4127 }
4128
4129 void
4130 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4131 {
4132         MonoCallInst *call = (MonoCallInst*)inst;
4133         CallInfo *cinfo = get_call_info (cfg, cfg->mempool, inst->signature, FALSE);
4134
4135         /* add the this argument */
4136         if (this_reg != -1) {
4137                 if (cinfo->args [0].storage == ArgInIReg) {
4138                         MonoInst *this;
4139                         MONO_INST_NEW (cfg, this, OP_MOVE);
4140                         this->type = this_type;
4141                         this->sreg1 = this_reg;
4142                         this->dreg = mono_regstate_next_int (cfg->rs);
4143                         mono_bblock_add_inst (cfg->cbb, this);
4144
4145                         mono_call_inst_add_outarg_reg (cfg, call, this->dreg, cinfo->args [0].reg, FALSE);
4146                 }
4147                 else {
4148                         MonoInst *this;
4149                         MONO_INST_NEW (cfg, this, OP_OUTARG);
4150                         this->type = this_type;
4151                         this->sreg1 = this_reg;
4152                         mono_bblock_add_inst (cfg->cbb, this);
4153                 }
4154         }
4155
4156         if (vt_reg != -1) {
4157                 MonoInst *vtarg;
4158
4159                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4160                         /*
4161                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4162                          * the stack. Save the address here, so the call instruction can
4163                          * access it.
4164                          */
4165                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4166                         vtarg->inst_destbasereg = X86_ESP;
4167                         vtarg->inst_offset = inst->stack_usage;
4168                         vtarg->sreg1 = vt_reg;
4169                         mono_bblock_add_inst (cfg->cbb, vtarg);
4170                 }
4171                 else if (cinfo->ret.storage == ArgInIReg) {
4172                         /* The return address is passed in a register */
4173                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
4174                         vtarg->sreg1 = vt_reg;
4175                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
4176                         mono_bblock_add_inst (cfg->cbb, vtarg);
4177
4178                         mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
4179                 } else {
4180                         MonoInst *vtarg;
4181                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4182                         vtarg->type = STACK_MP;
4183                         vtarg->sreg1 = vt_reg;
4184                         mono_bblock_add_inst (cfg->cbb, vtarg);
4185                 }
4186         }
4187 }
4188
4189 #ifdef MONO_ARCH_HAVE_IMT
4190
4191 // Linear handler, the bsearch head compare is shorter
4192 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
4193 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
4194 //        x86_patch(ins,target)
4195 //[1 + 5] x86_jump_mem(inst,mem)
4196
4197 #define CMP_SIZE 6
4198 #define BR_SMALL_SIZE 2
4199 #define BR_LARGE_SIZE 5
4200 #define JUMP_IMM_SIZE 6
4201 #define ENABLE_WRONG_METHOD_CHECK 0
4202
4203 static int
4204 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
4205 {
4206         int i, distance = 0;
4207         for (i = start; i < target; ++i)
4208                 distance += imt_entries [i]->chunk_size;
4209         return distance;
4210 }
4211
4212 /*
4213  * LOCKING: called with the domain lock held
4214  */
4215 gpointer
4216 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count)
4217 {
4218         int i;
4219         int size = 0;
4220         guint8 *code, *start;
4221
4222         for (i = 0; i < count; ++i) {
4223                 MonoIMTCheckItem *item = imt_entries [i];
4224                 if (item->is_equals) {
4225                         if (item->check_target_idx) {
4226                                 if (!item->compare_done)
4227                                         item->chunk_size += CMP_SIZE;
4228                                 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
4229                         } else {
4230                                 item->chunk_size += JUMP_IMM_SIZE;
4231 #if ENABLE_WRONG_METHOD_CHECK
4232                                 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
4233 #endif
4234                         }
4235                 } else {
4236                         item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
4237                         imt_entries [item->check_target_idx]->compare_done = TRUE;
4238                 }
4239                 size += item->chunk_size;
4240         }
4241         code = mono_code_manager_reserve (domain->code_mp, size);
4242         start = code;
4243         for (i = 0; i < count; ++i) {
4244                 MonoIMTCheckItem *item = imt_entries [i];
4245                 item->code_target = code;
4246                 if (item->is_equals) {
4247                         if (item->check_target_idx) {
4248                                 if (!item->compare_done)
4249                                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4250                                 item->jmp_code = code;
4251                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);
4252                                 x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
4253                         } else {
4254                                 /* enable the commented code to assert on wrong method */
4255 #if ENABLE_WRONG_METHOD_CHECK
4256                                 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4257                                 item->jmp_code = code;
4258                                 x86_branch8 (code, X86_CC_NE, 0, FALSE);
4259 #endif
4260                                 x86_jump_mem (code, & (vtable->vtable [item->vtable_slot]));
4261 #if ENABLE_WRONG_METHOD_CHECK
4262                                 x86_patch (item->jmp_code, code);
4263                                 x86_breakpoint (code);
4264                                 item->jmp_code = NULL;
4265 #endif
4266                         }
4267                 } else {
4268                         x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->method);
4269                         item->jmp_code = code;
4270                         if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
4271                                 x86_branch8 (code, X86_CC_GE, 0, FALSE);
4272                         else
4273                                 x86_branch32 (code, X86_CC_GE, 0, FALSE);
4274                 }
4275         }
4276         /* patch the branches to get to the target items */
4277         for (i = 0; i < count; ++i) {
4278                 MonoIMTCheckItem *item = imt_entries [i];
4279                 if (item->jmp_code) {
4280                         if (item->check_target_idx) {
4281                                 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
4282                         }
4283                 }
4284         }
4285                 
4286         mono_stats.imt_thunks_size += code - start;
4287         g_assert (code - start <= size);
4288         return start;
4289 }
4290
4291 MonoMethod*
4292 mono_arch_find_imt_method (gpointer *regs, guint8 *code)
4293 {
4294         return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
4295 }
4296
4297 MonoObject*
4298 mono_arch_find_this_argument (gpointer *regs, MonoMethod *method)
4299 {
4300         MonoMethodSignature *sig = mono_method_signature (method);
4301         CallInfo *cinfo = get_call_info (NULL, NULL, sig, FALSE);
4302         int this_argument_offset;
4303         MonoObject *this_argument;
4304
4305         /* 
4306          * this is the offset of the this arg from esp as saved at the start of 
4307          * mono_arch_create_trampoline_code () in tramp-x86.c.
4308          */
4309         this_argument_offset = 5;
4310         if (MONO_TYPE_ISSTRUCT (sig->ret) && (cinfo->ret.storage == ArgOnStack))
4311                 this_argument_offset++;
4312
4313         this_argument = * (MonoObject**) (((guint8*) regs [X86_ESP]) + this_argument_offset * sizeof (gpointer));
4314
4315         g_free (cinfo);
4316         return this_argument;
4317 }
4318 #endif
4319
4320 MonoInst*
4321 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4322 {
4323         MonoInst *ins = NULL;
4324
4325         if (cmethod->klass == mono_defaults.math_class) {
4326                 if (strcmp (cmethod->name, "Sin") == 0) {
4327                         MONO_INST_NEW (cfg, ins, OP_SIN);
4328                         ins->inst_i0 = args [0];
4329                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4330                         MONO_INST_NEW (cfg, ins, OP_COS);
4331                         ins->inst_i0 = args [0];
4332                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4333                         MONO_INST_NEW (cfg, ins, OP_TAN);
4334                         ins->inst_i0 = args [0];
4335                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4336                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4337                         ins->inst_i0 = args [0];
4338                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4339                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4340                         ins->inst_i0 = args [0];
4341                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4342                         MONO_INST_NEW (cfg, ins, OP_ABS);
4343                         ins->inst_i0 = args [0];
4344                 }
4345 #if 0
4346                 /* OP_FREM is not IEEE compatible */
4347                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4348                         MONO_INST_NEW (cfg, ins, OP_FREM);
4349                         ins->inst_i0 = args [0];
4350                         ins->inst_i1 = args [1];
4351                 }
4352 #endif
4353         } else if (cmethod->klass == mono_defaults.thread_class &&
4354                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
4355                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
4356         } else if(cmethod->klass->image == mono_defaults.corlib &&
4357                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4358                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4359
4360                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4361                         MonoInst *ins_iconst;
4362
4363                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4364                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4365                         ins_iconst->inst_c0 = 1;
4366
4367                         ins->inst_i0 = args [0];
4368                         ins->inst_i1 = ins_iconst;
4369                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4370                         MonoInst *ins_iconst;
4371
4372                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4373                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4374                         ins_iconst->inst_c0 = -1;
4375
4376                         ins->inst_i0 = args [0];
4377                         ins->inst_i1 = ins_iconst;
4378                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4379                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4380
4381                         ins->inst_i0 = args [0];
4382                         ins->inst_i1 = args [1];
4383                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4384                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4385
4386                         ins->inst_i0 = args [0];
4387                         ins->inst_i1 = args [1];
4388                 }
4389         }
4390
4391         return ins;
4392 }
4393
4394
4395 gboolean
4396 mono_arch_print_tree (MonoInst *tree, int arity)
4397 {
4398         return 0;
4399 }
4400
4401 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4402 {
4403         MonoInst* ins;
4404         
4405         if (appdomain_tls_offset == -1)
4406                 return NULL;
4407
4408         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4409         ins->inst_offset = appdomain_tls_offset;
4410         return ins;
4411 }
4412
4413 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4414 {
4415         MonoInst* ins;
4416
4417         if (thread_tls_offset == -1)
4418                 return NULL;
4419
4420         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4421         ins->inst_offset = thread_tls_offset;
4422         return ins;
4423 }
4424
4425 guint32
4426 mono_arch_get_patch_offset (guint8 *code)
4427 {
4428         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4429                 return 2;
4430         else if ((code [0] == 0xba))
4431                 return 1;
4432         else if ((code [0] == 0x68))
4433                 /* push IMM */
4434                 return 1;
4435         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4436                 /* push <OFFSET>(<REG>) */
4437                 return 2;
4438         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4439                 /* call *<OFFSET>(<REG>) */
4440                 return 2;
4441         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4442                 /* fldl <ADDR> */
4443                 return 2;
4444         else if ((code [0] == 0x58) && (code [1] == 0x05))
4445                 /* pop %eax; add <OFFSET>, %eax */
4446                 return 2;
4447         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4448                 /* pop <REG>; add <OFFSET>, <REG> */
4449                 return 3;
4450         else {
4451                 g_assert_not_reached ();
4452                 return -1;
4453         }
4454 }
4455
4456 gboolean
4457 mono_breakpoint_clean_code (guint8 *code, guint8 *buf, int size)
4458 {
4459         int i;
4460         gboolean can_write = TRUE;
4461         memcpy (buf, code, size);
4462         for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
4463                 int idx = mono_breakpoint_info_index [i];
4464                 guint8 *ptr;
4465                 if (idx < 1)
4466                         continue;
4467                 ptr = mono_breakpoint_info [idx].address;
4468                 if (ptr >= code && ptr < code + size) {
4469                         guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
4470                         can_write = FALSE;
4471                         /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
4472                         buf [ptr - code] = saved_byte;
4473                 }
4474         }
4475         return can_write;
4476 }
4477
4478 gpointer
4479 mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
4480 {
4481         guint8 buf [8];
4482         guint8 reg = 0;
4483         gint32 disp = 0;
4484
4485         mono_breakpoint_clean_code (code - 8, buf, sizeof (buf));
4486         code = buf + 8;
4487
4488         *displacement = 0;
4489
4490         /* go to the start of the call instruction
4491          *
4492          * address_byte = (m << 6) | (o << 3) | reg
4493          * call opcode: 0xff address_byte displacement
4494          * 0xff m=1,o=2 imm8
4495          * 0xff m=2,o=2 imm32
4496          */
4497         code -= 6;
4498
4499         /* 
4500          * A given byte sequence can match more than case here, so we have to be
4501          * really careful about the ordering of the cases. Longer sequences
4502          * come first.
4503          */
4504         if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
4505                 /*
4506                  * This is an interface call
4507                  * 8b 80 0c e8 ff ff       mov    0xffffe80c(%eax),%eax
4508                  * ff 10                   call   *(%eax)
4509                  */
4510                 reg = x86_modrm_rm (code [5]);
4511                 disp = 0;
4512 #ifdef MONO_ARCH_HAVE_IMT
4513         } else if ((code [-2] == 0xba) && (code [3] == 0xff) && (x86_modrm_mod (code [4]) == 1) && (x86_modrm_reg (code [4]) == 2) && ((signed char)code [5] < 0)) {
4514                 /* IMT-based interface calls: with MONO_ARCH_IMT_REG == edx
4515                  * ba 14 f8 28 08          mov    $0x828f814,%edx
4516                  * ff 50 fc                call   *0xfffffffc(%eax)
4517                  */
4518                 reg = code [4] & 0x07;
4519                 disp = (signed char)code [5];
4520 #endif
4521         } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4522                 reg = code [4] & 0x07;
4523                 disp = (signed char)code [5];
4524         } else {
4525                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4526                         reg = code [1] & 0x07;
4527                         disp = *((gint32*)(code + 2));
4528                 } else if ((code [1] == 0xe8)) {
4529                         return NULL;
4530                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4531                         /*
4532                          * This is a interface call
4533                          * 8b 40 30   mov    0x30(%eax),%eax
4534                          * ff 10      call   *(%eax)
4535                          */
4536                         disp = 0;
4537                         reg = code [5] & 0x07;
4538                 }
4539                 else
4540                         return NULL;
4541         }
4542
4543         *displacement = disp;
4544         return regs [reg];
4545 }
4546
4547 gpointer*
4548 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4549 {
4550         gpointer vt;
4551         int displacement;
4552         vt = mono_arch_get_vcall_slot (code, regs, &displacement);
4553         if (!vt)
4554                 return NULL;
4555         return (gpointer*)((char*)vt + displacement);
4556 }
4557
4558 gpointer
4559 mono_arch_get_this_arg_from_call (MonoMethodSignature *sig, gssize *regs, guint8 *code)
4560 {
4561         guint32 esp = regs [X86_ESP];
4562         CallInfo *cinfo;
4563         gpointer res;
4564
4565         cinfo = get_call_info (NULL, NULL, sig, FALSE);
4566
4567         /*
4568          * The stack looks like:
4569          * <other args>
4570          * <this=delegate>
4571          * <possible vtype return address>
4572          * <return addr>
4573          * <4 pointers pushed by mono_arch_create_trampoline_code ()>
4574          */
4575         res = (((MonoObject**)esp) [5 + (cinfo->args [0].offset / 4)]);
4576         g_free (cinfo);
4577         return res;
4578 }
4579
4580 #define MAX_ARCH_DELEGATE_PARAMS 10
4581
4582 gpointer
4583 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
4584 {
4585         guint8 *code, *start;
4586
4587         if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
4588                 return NULL;
4589
4590         /* FIXME: Support more cases */
4591         if (MONO_TYPE_ISSTRUCT (sig->ret))
4592                 return NULL;
4593
4594         /*
4595          * The stack contains:
4596          * <delegate>
4597          * <return addr>
4598          */
4599
4600         if (has_target) {
4601                 static guint8* cached = NULL;
4602                 mono_mini_arch_lock ();
4603                 if (cached) {
4604                         mono_mini_arch_unlock ();
4605                         return cached;
4606                 }
4607                 
4608                 start = code = mono_global_codeman_reserve (64);
4609
4610                 /* Replace the this argument with the target */
4611                 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
4612                 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
4613                 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
4614                 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4615
4616                 g_assert ((code - start) < 64);
4617
4618                 cached = start;
4619
4620                 mono_mini_arch_unlock ();
4621         } else {
4622                 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
4623                 int i = 0;
4624                 /* 8 for mov_reg and jump, plus 8 for each parameter */
4625                 int code_reserve = 8 + (sig->param_count * 8);
4626
4627                 for (i = 0; i < sig->param_count; ++i)
4628                         if (!mono_is_regsize_var (sig->params [i]))
4629                                 return NULL;
4630
4631                 mono_mini_arch_lock ();
4632                 code = cache [sig->param_count];
4633                 if (code) {
4634                         mono_mini_arch_unlock ();
4635                         return code;
4636                 }
4637
4638                 /*
4639                  * The stack contains:
4640                  * <args in reverse order>
4641                  * <delegate>
4642                  * <return addr>
4643                  *
4644                  * and we need:
4645                  * <args in reverse order>
4646                  * <return addr>
4647                  * 
4648                  * without unbalancing the stack.
4649                  * So move each arg up a spot in the stack (overwriting un-needed 'this' arg)
4650                  * and leaving original spot of first arg as placeholder in stack so
4651                  * when callee pops stack everything works.
4652                  */
4653
4654                 start = code = mono_global_codeman_reserve (code_reserve);
4655
4656                 /* store delegate for access to method_ptr */
4657                 x86_mov_reg_membase (code, X86_ECX, X86_ESP, 4, 4);
4658
4659                 /* move args up */
4660                 for (i = 0; i < sig->param_count; ++i) {
4661                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, (i+2)*4, 4);
4662                         x86_mov_membase_reg (code, X86_ESP, (i+1)*4, X86_EAX, 4);
4663                 }
4664
4665                 x86_jump_membase (code, X86_ECX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
4666
4667                 g_assert ((code - start) < code_reserve);
4668
4669                 cache [sig->param_count] = start;
4670
4671                 mono_mini_arch_unlock ();
4672         }
4673
4674         return start;
4675 }