2005-12-12 Mike Kestner <mkestner@novell.com>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14
15 #include <mono/metadata/appdomain.h>
16 #include <mono/metadata/debug-helpers.h>
17 #include <mono/metadata/threads.h>
18 #include <mono/metadata/profiler-private.h>
19 #include <mono/utils/mono-math.h>
20
21 #include "trace.h"
22 #include "mini-x86.h"
23 #include "inssel.h"
24 #include "cpu-pentium.h"
25
26 /* On windows, these hold the key returned by TlsAlloc () */
27 static gint lmf_tls_offset = -1;
28 static gint appdomain_tls_offset = -1;
29 static gint thread_tls_offset = -1;
30
31 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
32
33 #define ARGS_OFFSET 8
34
35 #ifdef PLATFORM_WIN32
36 /* Under windows, the default pinvoke calling convention is stdcall */
37 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
38 #else
39 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
40 #endif
41
42 #define NOT_IMPLEMENTED g_assert_not_reached ()
43
44 const char*
45 mono_arch_regname (int reg) {
46         switch (reg) {
47         case X86_EAX: return "%eax";
48         case X86_EBX: return "%ebx";
49         case X86_ECX: return "%ecx";
50         case X86_EDX: return "%edx";
51         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
52         case X86_EDI: return "%edi";
53         case X86_ESI: return "%esi";
54         }
55         return "unknown";
56 }
57
58 const char*
59 mono_arch_fregname (int reg) {
60         return "unknown";
61 }
62
63 typedef enum {
64         ArgInIReg,
65         ArgInFloatSSEReg,
66         ArgInDoubleSSEReg,
67         ArgOnStack,
68         ArgValuetypeInReg,
69         ArgOnFloatFpStack,
70         ArgOnDoubleFpStack,
71         ArgNone
72 } ArgStorage;
73
74 typedef struct {
75         gint16 offset;
76         gint8  reg;
77         ArgStorage storage;
78
79         /* Only if storage == ArgValuetypeInReg */
80         ArgStorage pair_storage [2];
81         gint8 pair_regs [2];
82 } ArgInfo;
83
84 typedef struct {
85         int nargs;
86         guint32 stack_usage;
87         guint32 reg_usage;
88         guint32 freg_usage;
89         gboolean need_stack_align;
90         ArgInfo ret;
91         ArgInfo sig_cookie;
92         ArgInfo args [1];
93 } CallInfo;
94
95 #define PARAM_REGS 0
96
97 #define FLOAT_PARAM_REGS 0
98
99 static X86_Reg_No param_regs [] = { 0 };
100
101 #ifdef PLATFORM_WIN32
102 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
103 #endif
104
105 static void inline
106 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
107 {
108     ainfo->offset = *stack_size;
109
110     if (*gr >= PARAM_REGS) {
111                 ainfo->storage = ArgOnStack;
112                 (*stack_size) += sizeof (gpointer);
113     }
114     else {
115                 ainfo->storage = ArgInIReg;
116                 ainfo->reg = param_regs [*gr];
117                 (*gr) ++;
118     }
119 }
120
121 static void inline
122 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
123 {
124         ainfo->offset = *stack_size;
125
126         g_assert (PARAM_REGS == 0);
127         
128         ainfo->storage = ArgOnStack;
129         (*stack_size) += sizeof (gpointer) * 2;
130 }
131
132 static void inline
133 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
134 {
135     ainfo->offset = *stack_size;
136
137     if (*gr >= FLOAT_PARAM_REGS) {
138                 ainfo->storage = ArgOnStack;
139                 (*stack_size) += is_double ? 8 : 4;
140     }
141     else {
142                 /* A double register */
143                 if (is_double)
144                         ainfo->storage = ArgInDoubleSSEReg;
145                 else
146                         ainfo->storage = ArgInFloatSSEReg;
147                 ainfo->reg = *gr;
148                 (*gr) += 1;
149     }
150 }
151
152
153 static void
154 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
155                gboolean is_return,
156                guint32 *gr, guint32 *fr, guint32 *stack_size)
157 {
158         guint32 size;
159         MonoClass *klass;
160
161         klass = mono_class_from_mono_type (type);
162         if (sig->pinvoke) 
163                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
164         else 
165                 size = mono_type_stack_size (&klass->byval_arg, NULL);
166
167 #ifdef PLATFORM_WIN32
168         if (sig->pinvoke && is_return) {
169                 MonoMarshalType *info;
170
171                 /*
172                  * the exact rules are not very well documented, the code below seems to work with the 
173                  * code generated by gcc 3.3.3 -mno-cygwin.
174                  */
175                 info = mono_marshal_load_type_info (klass);
176                 g_assert (info);
177
178                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
179
180                 /* Special case structs with only a float member */
181                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
182                         ainfo->storage = ArgValuetypeInReg;
183                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
184                         return;
185                 }
186                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
187                         ainfo->storage = ArgValuetypeInReg;
188                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
189                         return;
190                 }               
191                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
192                         ainfo->storage = ArgValuetypeInReg;
193                         ainfo->pair_storage [0] = ArgInIReg;
194                         ainfo->pair_regs [0] = return_regs [0];
195                         if (info->native_size > 4) {
196                                 ainfo->pair_storage [1] = ArgInIReg;
197                                 ainfo->pair_regs [1] = return_regs [1];
198                         }
199                         return;
200                 }
201         }
202 #endif
203
204         ainfo->offset = *stack_size;
205         ainfo->storage = ArgOnStack;
206         *stack_size += ALIGN_TO (size, sizeof (gpointer));
207 }
208
209 /*
210  * get_call_info:
211  *
212  *  Obtain information about a call according to the calling convention.
213  * For x86 ELF, see the "System V Application Binary Interface Intel386 
214  * Architecture Processor Supplment, Fourth Edition" document for more
215  * information.
216  * For x86 win32, see ???.
217  */
218 static CallInfo*
219 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
220 {
221         guint32 i, gr, fr;
222         MonoType *ret_type;
223         int n = sig->hasthis + sig->param_count;
224         guint32 stack_size = 0;
225         CallInfo *cinfo;
226
227         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
228
229         gr = 0;
230         fr = 0;
231
232         /* return value */
233         {
234                 ret_type = mono_type_get_underlying_type (sig->ret);
235                 switch (ret_type->type) {
236                 case MONO_TYPE_BOOLEAN:
237                 case MONO_TYPE_I1:
238                 case MONO_TYPE_U1:
239                 case MONO_TYPE_I2:
240                 case MONO_TYPE_U2:
241                 case MONO_TYPE_CHAR:
242                 case MONO_TYPE_I4:
243                 case MONO_TYPE_U4:
244                 case MONO_TYPE_I:
245                 case MONO_TYPE_U:
246                 case MONO_TYPE_PTR:
247                 case MONO_TYPE_FNPTR:
248                 case MONO_TYPE_CLASS:
249                 case MONO_TYPE_OBJECT:
250                 case MONO_TYPE_SZARRAY:
251                 case MONO_TYPE_ARRAY:
252                 case MONO_TYPE_STRING:
253                         cinfo->ret.storage = ArgInIReg;
254                         cinfo->ret.reg = X86_EAX;
255                         break;
256                 case MONO_TYPE_U8:
257                 case MONO_TYPE_I8:
258                         cinfo->ret.storage = ArgInIReg;
259                         cinfo->ret.reg = X86_EAX;
260                         break;
261                 case MONO_TYPE_R4:
262                         cinfo->ret.storage = ArgOnFloatFpStack;
263                         break;
264                 case MONO_TYPE_R8:
265                         cinfo->ret.storage = ArgOnDoubleFpStack;
266                         break;
267                 case MONO_TYPE_VALUETYPE: {
268                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
269
270                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
271                         if (cinfo->ret.storage == ArgOnStack)
272                                 /* The caller passes the address where the value is stored */
273                                 add_general (&gr, &stack_size, &cinfo->ret);
274                         break;
275                 }
276                 case MONO_TYPE_TYPEDBYREF:
277                         /* Same as a valuetype with size 24 */
278                         add_general (&gr, &stack_size, &cinfo->ret);
279                         ;
280                         break;
281                 case MONO_TYPE_VOID:
282                         cinfo->ret.storage = ArgNone;
283                         break;
284                 default:
285                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
286                 }
287         }
288
289         /* this */
290         if (sig->hasthis)
291                 add_general (&gr, &stack_size, cinfo->args + 0);
292
293         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
294                 gr = PARAM_REGS;
295                 fr = FLOAT_PARAM_REGS;
296                 
297                 /* Emit the signature cookie just before the implicit arguments */
298                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
299         }
300
301         for (i = 0; i < sig->param_count; ++i) {
302                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
303                 MonoType *ptype;
304
305                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
306                         /* We allways pass the sig cookie on the stack for simplicity */
307                         /* 
308                          * Prevent implicit arguments + the sig cookie from being passed 
309                          * in registers.
310                          */
311                         gr = PARAM_REGS;
312                         fr = FLOAT_PARAM_REGS;
313
314                         /* Emit the signature cookie just before the implicit arguments */
315                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
316                 }
317
318                 if (sig->params [i]->byref) {
319                         add_general (&gr, &stack_size, ainfo);
320                         continue;
321                 }
322                 ptype = mono_type_get_underlying_type (sig->params [i]);
323                 switch (ptype->type) {
324                 case MONO_TYPE_BOOLEAN:
325                 case MONO_TYPE_I1:
326                 case MONO_TYPE_U1:
327                         add_general (&gr, &stack_size, ainfo);
328                         break;
329                 case MONO_TYPE_I2:
330                 case MONO_TYPE_U2:
331                 case MONO_TYPE_CHAR:
332                         add_general (&gr, &stack_size, ainfo);
333                         break;
334                 case MONO_TYPE_I4:
335                 case MONO_TYPE_U4:
336                         add_general (&gr, &stack_size, ainfo);
337                         break;
338                 case MONO_TYPE_I:
339                 case MONO_TYPE_U:
340                 case MONO_TYPE_PTR:
341                 case MONO_TYPE_FNPTR:
342                 case MONO_TYPE_CLASS:
343                 case MONO_TYPE_OBJECT:
344                 case MONO_TYPE_STRING:
345                 case MONO_TYPE_SZARRAY:
346                 case MONO_TYPE_ARRAY:
347                         add_general (&gr, &stack_size, ainfo);
348                         break;
349                 case MONO_TYPE_VALUETYPE:
350                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
351                         break;
352                 case MONO_TYPE_TYPEDBYREF:
353                         stack_size += sizeof (MonoTypedRef);
354                         ainfo->storage = ArgOnStack;
355                         break;
356                 case MONO_TYPE_U8:
357                 case MONO_TYPE_I8:
358                         add_general_pair (&gr, &stack_size, ainfo);
359                         break;
360                 case MONO_TYPE_R4:
361                         add_float (&fr, &stack_size, ainfo, FALSE);
362                         break;
363                 case MONO_TYPE_R8:
364                         add_float (&fr, &stack_size, ainfo, TRUE);
365                         break;
366                 default:
367                         g_error ("unexpected type 0x%x", ptype->type);
368                         g_assert_not_reached ();
369                 }
370         }
371
372         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
373                 gr = PARAM_REGS;
374                 fr = FLOAT_PARAM_REGS;
375                 
376                 /* Emit the signature cookie just before the implicit arguments */
377                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
378         }
379
380         cinfo->stack_usage = stack_size;
381         cinfo->reg_usage = gr;
382         cinfo->freg_usage = fr;
383         return cinfo;
384 }
385
386 /*
387  * mono_arch_get_argument_info:
388  * @csig:  a method signature
389  * @param_count: the number of parameters to consider
390  * @arg_info: an array to store the result infos
391  *
392  * Gathers information on parameters such as size, alignment and
393  * padding. arg_info should be large enought to hold param_count + 1 entries. 
394  *
395  * Returns the size of the activation frame.
396  */
397 int
398 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
399 {
400         int k, frame_size = 0;
401         int size, align, pad;
402         int offset = 8;
403         CallInfo *cinfo;
404
405         cinfo = get_call_info (csig, FALSE);
406
407         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
408                 frame_size += sizeof (gpointer);
409                 offset += 4;
410         }
411
412         arg_info [0].offset = offset;
413
414         if (csig->hasthis) {
415                 frame_size += sizeof (gpointer);
416                 offset += 4;
417         }
418
419         arg_info [0].size = frame_size;
420
421         for (k = 0; k < param_count; k++) {
422                 
423                 if (csig->pinvoke)
424                         size = mono_type_native_stack_size (csig->params [k], &align);
425                 else
426                         size = mono_type_stack_size (csig->params [k], &align);
427
428                 /* ignore alignment for now */
429                 align = 1;
430
431                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
432                 arg_info [k].pad = pad;
433                 frame_size += size;
434                 arg_info [k + 1].pad = 0;
435                 arg_info [k + 1].size = size;
436                 offset += pad;
437                 arg_info [k + 1].offset = offset;
438                 offset += size;
439         }
440
441         align = MONO_ARCH_FRAME_ALIGNMENT;
442         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
443         arg_info [k].pad = pad;
444
445         g_free (cinfo);
446
447         return frame_size;
448 }
449
450 static const guchar cpuid_impl [] = {
451         0x55,                           /* push   %ebp */
452         0x89, 0xe5,                     /* mov    %esp,%ebp */
453         0x53,                           /* push   %ebx */
454         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
455         0x0f, 0xa2,                     /* cpuid   */
456         0x50,                           /* push   %eax */
457         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
458         0x89, 0x18,                     /* mov    %ebx,(%eax) */
459         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
460         0x89, 0x08,                     /* mov    %ecx,(%eax) */
461         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
462         0x89, 0x10,                     /* mov    %edx,(%eax) */
463         0x58,                           /* pop    %eax */
464         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
465         0x89, 0x02,                     /* mov    %eax,(%edx) */
466         0x5b,                           /* pop    %ebx */
467         0xc9,                           /* leave   */
468         0xc3,                           /* ret     */
469 };
470
471 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
472
473 static int 
474 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
475 {
476         int have_cpuid = 0;
477 #ifndef _MSC_VER
478         __asm__  __volatile__ (
479                 "pushfl\n"
480                 "popl %%eax\n"
481                 "movl %%eax, %%edx\n"
482                 "xorl $0x200000, %%eax\n"
483                 "pushl %%eax\n"
484                 "popfl\n"
485                 "pushfl\n"
486                 "popl %%eax\n"
487                 "xorl %%edx, %%eax\n"
488                 "andl $0x200000, %%eax\n"
489                 "movl %%eax, %0"
490                 : "=r" (have_cpuid)
491                 :
492                 : "%eax", "%edx"
493         );
494 #else
495         __asm {
496                 pushfd
497                 pop eax
498                 mov edx, eax
499                 xor eax, 0x200000
500                 push eax
501                 popfd
502                 pushfd
503                 pop eax
504                 xor eax, edx
505                 and eax, 0x200000
506                 mov have_cpuid, eax
507         }
508 #endif
509         if (have_cpuid) {
510                 /* Have to use the code manager to get around WinXP DEP */
511                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
512                 CpuidFunc func;
513                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
514                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
515
516                 func = (CpuidFunc)ptr;
517                 func (id, p_eax, p_ebx, p_ecx, p_edx);
518
519                 mono_code_manager_destroy (codeman);
520
521                 /*
522                  * We use this approach because of issues with gcc and pic code, see:
523                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
524                 __asm__ __volatile__ ("cpuid"
525                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
526                         : "a" (id));
527                 */
528                 return 1;
529         }
530         return 0;
531 }
532
533 /*
534  * Initialize the cpu to execute managed code.
535  */
536 void
537 mono_arch_cpu_init (void)
538 {
539         /* spec compliance requires running with double precision */
540 #ifndef _MSC_VER
541         guint16 fpcw;
542
543         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
544         fpcw &= ~X86_FPCW_PRECC_MASK;
545         fpcw |= X86_FPCW_PREC_DOUBLE;
546         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
547         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
548 #else
549         _control87 (_PC_53, MCW_PC);
550 #endif
551 }
552
553 /*
554  * This function returns the optimizations supported on this cpu.
555  */
556 guint32
557 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
558 {
559         int eax, ebx, ecx, edx;
560         guint32 opts = 0;
561         
562         *exclude_mask = 0;
563         /* Feature Flags function, flags returned in EDX. */
564         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
565                 if (edx & (1 << 15)) {
566                         opts |= MONO_OPT_CMOV;
567                         if (edx & 1)
568                                 opts |= MONO_OPT_FCMOV;
569                         else
570                                 *exclude_mask |= MONO_OPT_FCMOV;
571                 } else
572                         *exclude_mask |= MONO_OPT_CMOV;
573         }
574         return opts;
575 }
576
577 /*
578  * Determine whenever the trap whose info is in SIGINFO is caused by
579  * integer overflow.
580  */
581 gboolean
582 mono_arch_is_int_overflow (void *sigctx, void *info)
583 {
584         MonoContext ctx;
585         guint8* ip;
586
587         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
588
589         ip = (guint8*)ctx.eip;
590
591         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
592                 gint32 reg;
593
594                 /* idiv REG */
595                 switch (x86_modrm_rm (ip [1])) {
596                 case X86_EAX:
597                         reg = ctx.eax;
598                         break;
599                 case X86_ECX:
600                         reg = ctx.ecx;
601                         break;
602                 case X86_EDX:
603                         reg = ctx.edx;
604                         break;
605                 case X86_EBX:
606                         reg = ctx.ebx;
607                         break;
608                 case X86_ESI:
609                         reg = ctx.esi;
610                         break;
611                 case X86_EDI:
612                         reg = ctx.edi;
613                         break;
614                 default:
615                         g_assert_not_reached ();
616                         reg = -1;
617                 }
618
619                 if (reg == -1)
620                         return TRUE;
621         }
622                         
623         return FALSE;
624 }
625
626 static gboolean
627 is_regsize_var (MonoType *t) {
628         if (t->byref)
629                 return TRUE;
630         switch (mono_type_get_underlying_type (t)->type) {
631         case MONO_TYPE_I4:
632         case MONO_TYPE_U4:
633         case MONO_TYPE_I:
634         case MONO_TYPE_U:
635         case MONO_TYPE_PTR:
636         case MONO_TYPE_FNPTR:
637                 return TRUE;
638         case MONO_TYPE_OBJECT:
639         case MONO_TYPE_STRING:
640         case MONO_TYPE_CLASS:
641         case MONO_TYPE_SZARRAY:
642         case MONO_TYPE_ARRAY:
643                 return TRUE;
644         case MONO_TYPE_VALUETYPE:
645                 return FALSE;
646         }
647         return FALSE;
648 }
649
650 GList *
651 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
652 {
653         GList *vars = NULL;
654         int i;
655
656         for (i = 0; i < cfg->num_varinfo; i++) {
657                 MonoInst *ins = cfg->varinfo [i];
658                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
659
660                 /* unused vars */
661                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
662                         continue;
663
664                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
665                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
666                         continue;
667
668                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
669                  * 8bit quantities in caller saved registers on x86 */
670                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
671                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
672                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
673                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
674                         g_assert (i == vmv->idx);
675                         vars = g_list_prepend (vars, vmv);
676                 }
677         }
678
679         vars = mono_varlist_sort (cfg, vars, 0);
680
681         return vars;
682 }
683
684 GList *
685 mono_arch_get_global_int_regs (MonoCompile *cfg)
686 {
687         GList *regs = NULL;
688
689         /* we can use 3 registers for global allocation */
690         regs = g_list_prepend (regs, (gpointer)X86_EBX);
691         regs = g_list_prepend (regs, (gpointer)X86_ESI);
692         regs = g_list_prepend (regs, (gpointer)X86_EDI);
693
694         return regs;
695 }
696
697 /*
698  * mono_arch_regalloc_cost:
699  *
700  *  Return the cost, in number of memory references, of the action of 
701  * allocating the variable VMV into a register during global register
702  * allocation.
703  */
704 guint32
705 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
706 {
707         MonoInst *ins = cfg->varinfo [vmv->idx];
708
709         if (cfg->method->save_lmf)
710                 /* The register is already saved */
711                 return (ins->opcode == OP_ARG) ? 1 : 0;
712         else
713                 /* push+pop+possible load if it is an argument */
714                 return (ins->opcode == OP_ARG) ? 3 : 2;
715 }
716  
717 /*
718  * Set var information according to the calling convention. X86 version.
719  * The locals var stuff should most likely be split in another method.
720  */
721 void
722 mono_arch_allocate_vars (MonoCompile *cfg)
723 {
724         MonoMethodSignature *sig;
725         MonoMethodHeader *header;
726         MonoInst *inst;
727         guint32 locals_stack_size, locals_stack_align;
728         int i, offset;
729         gint32 *offsets;
730         CallInfo *cinfo;
731
732         header = mono_method_get_header (cfg->method);
733         sig = mono_method_signature (cfg->method);
734
735         cinfo = get_call_info (sig, FALSE);
736
737         cfg->frame_reg = MONO_ARCH_BASEREG;
738         offset = 0;
739
740         /* Reserve space to save LMF and caller saved registers */
741
742         if (cfg->method->save_lmf) {
743                 offset += sizeof (MonoLMF);
744         } else {
745                 if (cfg->used_int_regs & (1 << X86_EBX)) {
746                         offset += 4;
747                 }
748
749                 if (cfg->used_int_regs & (1 << X86_EDI)) {
750                         offset += 4;
751                 }
752
753                 if (cfg->used_int_regs & (1 << X86_ESI)) {
754                         offset += 4;
755                 }
756         }
757
758         switch (cinfo->ret.storage) {
759         case ArgValuetypeInReg:
760                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
761                 offset += 8;
762                 cfg->ret->opcode = OP_REGOFFSET;
763                 cfg->ret->inst_basereg = X86_EBP;
764                 cfg->ret->inst_offset = - offset;
765                 break;
766         default:
767                 break;
768         }
769
770         /* Allocate locals */
771         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
772         if (locals_stack_align) {
773                 offset += (locals_stack_align - 1);
774                 offset &= ~(locals_stack_align - 1);
775         }
776         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
777                 if (offsets [i] != -1) {
778                         MonoInst *inst = cfg->varinfo [i];
779                         inst->opcode = OP_REGOFFSET;
780                         inst->inst_basereg = X86_EBP;
781                         inst->inst_offset = - (offset + offsets [i]);
782                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
783                 }
784         }
785         g_free (offsets);
786         offset += locals_stack_size;
787
788
789         /*
790          * Allocate arguments+return value
791          */
792
793         switch (cinfo->ret.storage) {
794         case ArgOnStack:
795                 cfg->ret->opcode = OP_REGOFFSET;
796                 cfg->ret->inst_basereg = X86_EBP;
797                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
798                 break;
799         case ArgValuetypeInReg:
800                 break;
801         case ArgInIReg:
802                 cfg->ret->opcode = OP_REGVAR;
803                 cfg->ret->inst_c0 = cinfo->ret.reg;
804                 break;
805         case ArgNone:
806         case ArgOnFloatFpStack:
807         case ArgOnDoubleFpStack:
808                 break;
809         default:
810                 g_assert_not_reached ();
811         }
812
813         if (sig->call_convention == MONO_CALL_VARARG) {
814                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
815                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
816         }
817
818         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
819                 ArgInfo *ainfo = &cinfo->args [i];
820                 inst = cfg->varinfo [i];
821                 if (inst->opcode != OP_REGVAR) {
822                         inst->opcode = OP_REGOFFSET;
823                         inst->inst_basereg = X86_EBP;
824                 }
825                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
826         }
827
828         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
829         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
830
831         cfg->stack_offset = offset;
832
833         g_free (cinfo);
834 }
835
836 void
837 mono_arch_create_vars (MonoCompile *cfg)
838 {
839         MonoMethodSignature *sig;
840         CallInfo *cinfo;
841
842         sig = mono_method_signature (cfg->method);
843
844         cinfo = get_call_info (sig, FALSE);
845
846         if (cinfo->ret.storage == ArgValuetypeInReg)
847                 cfg->ret_var_is_local = TRUE;
848
849         g_free (cinfo);
850 }
851
852 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
853  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
854  */
855
856 /* 
857  * take the arguments and generate the arch-specific
858  * instructions to properly call the function in call.
859  * This includes pushing, moving arguments to the right register
860  * etc.
861  */
862 MonoCallInst*
863 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
864         MonoInst *arg, *in;
865         MonoMethodSignature *sig;
866         int i, n;
867         CallInfo *cinfo;
868         int sentinelpos;
869
870         sig = call->signature;
871         n = sig->param_count + sig->hasthis;
872
873         cinfo = get_call_info (sig, FALSE);
874
875         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
876                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
877
878         for (i = 0; i < n; ++i) {
879                 ArgInfo *ainfo = cinfo->args + i;
880
881                 /* Emit the signature cookie just before the implicit arguments */
882                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
883                         MonoMethodSignature *tmp_sig;
884                         MonoInst *sig_arg;
885
886                         /* FIXME: Add support for signature tokens to AOT */
887                         cfg->disable_aot = TRUE;
888                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
889
890                         /*
891                          * mono_ArgIterator_Setup assumes the signature cookie is 
892                          * passed first and all the arguments which were before it are
893                          * passed on the stack after the signature. So compensate by 
894                          * passing a different signature.
895                          */
896                         tmp_sig = mono_metadata_signature_dup (call->signature);
897                         tmp_sig->param_count -= call->signature->sentinelpos;
898                         tmp_sig->sentinelpos = 0;
899                         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
900
901                         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
902                         sig_arg->inst_p0 = tmp_sig;
903
904                         arg->inst_left = sig_arg;
905                         arg->type = STACK_PTR;
906                         /* prepend, so they get reversed */
907                         arg->next = call->out_args;
908                         call->out_args = arg;
909                 }
910
911                 if (is_virtual && i == 0) {
912                         /* the argument will be attached to the call instrucion */
913                         in = call->args [i];
914                 } else {
915                         MonoType *t;
916
917                         if (i >= sig->hasthis)
918                                 t = sig->params [i - sig->hasthis];
919                         else
920                                 t = &mono_defaults.int_class->byval_arg;
921                         t = mono_type_get_underlying_type (t);
922
923                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
924                         in = call->args [i];
925                         arg->cil_code = in->cil_code;
926                         arg->inst_left = in;
927                         arg->type = in->type;
928                         /* prepend, so they get reversed */
929                         arg->next = call->out_args;
930                         call->out_args = arg;
931
932                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
933                                 gint align;
934                                 guint32 size;
935
936                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
937                                         size = sizeof (MonoTypedRef);
938                                         align = sizeof (gpointer);
939                                 }
940                                 else
941                                         if (sig->pinvoke)
942                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
943                                         else
944                                                 size = mono_type_stack_size (&in->klass->byval_arg, &align);
945                                 arg->opcode = OP_OUTARG_VT;
946                                 arg->klass = in->klass;
947                                 arg->unused = sig->pinvoke;
948                                 arg->inst_imm = size; 
949                         }
950                         else {
951                                 switch (ainfo->storage) {
952                                 case ArgOnStack:
953                                         arg->opcode = OP_OUTARG;
954                                         if (!t->byref) {
955                                                 if (t->type == MONO_TYPE_R4)
956                                                         arg->opcode = OP_OUTARG_R4;
957                                                 else
958                                                         if (t->type == MONO_TYPE_R8)
959                                                                 arg->opcode = OP_OUTARG_R8;
960                                         }
961                                         break;
962                                 default:
963                                         g_assert_not_reached ();
964                                 }
965                         }
966                 }
967         }
968
969         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
970                 if (cinfo->ret.storage == ArgValuetypeInReg) {
971                         MonoInst *zero_inst;
972                         /*
973                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
974                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
975                          * before calling the function. So we add a dummy instruction to represent pushing the 
976                          * struct return address to the stack. The return address will be saved to this stack slot 
977                          * by the code emitted in this_vret_args.
978                          */
979                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
980                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
981                         zero_inst->inst_p0 = 0;
982                         arg->inst_left = zero_inst;
983                         arg->type = STACK_PTR;
984                         /* prepend, so they get reversed */
985                         arg->next = call->out_args;
986                         call->out_args = arg;
987                 }
988                 else
989                         /* if the function returns a struct, the called method already does a ret $0x4 */
990                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
991                                 cinfo->stack_usage -= 4;
992         }
993
994         call->stack_usage = cinfo->stack_usage;
995         g_free (cinfo);
996
997         return call;
998 }
999
1000 /*
1001  * Allow tracing to work with this interface (with an optional argument)
1002  */
1003 void*
1004 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1005 {
1006         guchar *code = p;
1007
1008         /* if some args are passed in registers, we need to save them here */
1009         x86_push_reg (code, X86_EBP);
1010
1011         if (cfg->compile_aot) {
1012                 x86_push_imm (code, cfg->method);
1013                 x86_mov_reg_imm (code, X86_EAX, func);
1014                 x86_call_reg (code, X86_EAX);
1015         } else {
1016                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1017                 x86_push_imm (code, cfg->method);
1018                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1019                 x86_call_code (code, 0);
1020         }
1021         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1022
1023         return code;
1024 }
1025
1026 enum {
1027         SAVE_NONE,
1028         SAVE_STRUCT,
1029         SAVE_EAX,
1030         SAVE_EAX_EDX,
1031         SAVE_FP
1032 };
1033
1034 void*
1035 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1036 {
1037         guchar *code = p;
1038         int arg_size = 0, save_mode = SAVE_NONE;
1039         MonoMethod *method = cfg->method;
1040         
1041         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1042         case MONO_TYPE_VOID:
1043                 /* special case string .ctor icall */
1044                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1045                         save_mode = SAVE_EAX;
1046                 else
1047                         save_mode = SAVE_NONE;
1048                 break;
1049         case MONO_TYPE_I8:
1050         case MONO_TYPE_U8:
1051                 save_mode = SAVE_EAX_EDX;
1052                 break;
1053         case MONO_TYPE_R4:
1054         case MONO_TYPE_R8:
1055                 save_mode = SAVE_FP;
1056                 break;
1057         case MONO_TYPE_VALUETYPE:
1058                 save_mode = SAVE_STRUCT;
1059                 break;
1060         default:
1061                 save_mode = SAVE_EAX;
1062                 break;
1063         }
1064
1065         switch (save_mode) {
1066         case SAVE_EAX_EDX:
1067                 x86_push_reg (code, X86_EDX);
1068                 x86_push_reg (code, X86_EAX);
1069                 if (enable_arguments) {
1070                         x86_push_reg (code, X86_EDX);
1071                         x86_push_reg (code, X86_EAX);
1072                         arg_size = 8;
1073                 }
1074                 break;
1075         case SAVE_EAX:
1076                 x86_push_reg (code, X86_EAX);
1077                 if (enable_arguments) {
1078                         x86_push_reg (code, X86_EAX);
1079                         arg_size = 4;
1080                 }
1081                 break;
1082         case SAVE_FP:
1083                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1084                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1085                 if (enable_arguments) {
1086                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1087                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1088                         arg_size = 8;
1089                 }
1090                 break;
1091         case SAVE_STRUCT:
1092                 if (enable_arguments) {
1093                         x86_push_membase (code, X86_EBP, 8);
1094                         arg_size = 4;
1095                 }
1096                 break;
1097         case SAVE_NONE:
1098         default:
1099                 break;
1100         }
1101
1102         if (cfg->compile_aot) {
1103                 x86_push_imm (code, method);
1104                 x86_mov_reg_imm (code, X86_EAX, func);
1105                 x86_call_reg (code, X86_EAX);
1106         } else {
1107                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1108                 x86_push_imm (code, method);
1109                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1110                 x86_call_code (code, 0);
1111         }
1112         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1113
1114         switch (save_mode) {
1115         case SAVE_EAX_EDX:
1116                 x86_pop_reg (code, X86_EAX);
1117                 x86_pop_reg (code, X86_EDX);
1118                 break;
1119         case SAVE_EAX:
1120                 x86_pop_reg (code, X86_EAX);
1121                 break;
1122         case SAVE_FP:
1123                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1124                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1125                 break;
1126         case SAVE_NONE:
1127         default:
1128                 break;
1129         }
1130
1131         return code;
1132 }
1133
1134 #define EMIT_COND_BRANCH(ins,cond,sign) \
1135 if (ins->flags & MONO_INST_BRLABEL) { \
1136         if (ins->inst_i0->inst_c0) { \
1137                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1138         } else { \
1139                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1140                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1141                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1142                         x86_branch8 (code, cond, 0, sign); \
1143                 else \
1144                         x86_branch32 (code, cond, 0, sign); \
1145         } \
1146 } else { \
1147         if (ins->inst_true_bb->native_offset) { \
1148                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1149         } else { \
1150                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1151                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1152                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1153                         x86_branch8 (code, cond, 0, sign); \
1154                 else \
1155                         x86_branch32 (code, cond, 0, sign); \
1156         } \
1157 }
1158
1159 /*  
1160  *      Emit an exception if condition is fail and
1161  *  if possible do a directly branch to target 
1162  */
1163 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1164         do {                                                        \
1165                 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1166                 if (tins == NULL) {                                                                             \
1167                         mono_add_patch_info (cfg, code - cfg->native_code,   \
1168                                         MONO_PATCH_INFO_EXC, exc_name);  \
1169                         x86_branch32 (code, cond, 0, signed);               \
1170                 } else {        \
1171                         EMIT_COND_BRANCH (tins, cond, signed);  \
1172                 }                       \
1173         } while (0); 
1174
1175 #define EMIT_FPCOMPARE(code) do { \
1176         x86_fcompp (code); \
1177         x86_fnstsw (code); \
1178 } while (0); 
1179
1180
1181 static guint8*
1182 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1183 {
1184         if (cfg->compile_aot) {
1185                 guint32 got_reg = X86_EAX;
1186
1187                 if (cfg->compile_aot) {          
1188                         /*
1189                          * Since the patches are generated by the back end, there is
1190                          * no way to generate a got_var at this point.
1191                          */
1192                         g_assert (cfg->got_var);
1193
1194                         if (cfg->got_var->opcode == OP_REGOFFSET)
1195                                 x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
1196                         else
1197                                 got_reg = cfg->got_var->dreg;
1198                 }
1199
1200                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1201                 x86_call_membase (code, got_reg, 0xf0f0f0f0);
1202         }
1203         else {
1204                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1205                 x86_call_code (code, 0);
1206         }
1207
1208         return code;
1209 }
1210
1211 /* FIXME: Add more instructions */
1212 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1213
1214 static void
1215 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1216 {
1217         MonoInst *ins, *last_ins = NULL;
1218         ins = bb->code;
1219
1220         while (ins) {
1221
1222                 switch (ins->opcode) {
1223                 case OP_ICONST:
1224                         /* reg = 0 -> XOR (reg, reg) */
1225                         /* XOR sets cflags on x86, so we cant do it always */
1226                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1227                                 ins->opcode = CEE_XOR;
1228                                 ins->sreg1 = ins->dreg;
1229                                 ins->sreg2 = ins->dreg;
1230                         }
1231                         break;
1232                 case OP_MUL_IMM: 
1233                         /* remove unnecessary multiplication with 1 */
1234                         if (ins->inst_imm == 1) {
1235                                 if (ins->dreg != ins->sreg1) {
1236                                         ins->opcode = OP_MOVE;
1237                                 } else {
1238                                         last_ins->next = ins->next;
1239                                         ins = ins->next;
1240                                         continue;
1241                                 }
1242                         }
1243                         break;
1244                 case OP_COMPARE_IMM:
1245                         /* OP_COMPARE_IMM (reg, 0) 
1246                          * --> 
1247                          * OP_X86_TEST_NULL (reg) 
1248                          */
1249                         if (!ins->inst_imm)
1250                                 ins->opcode = OP_X86_TEST_NULL;
1251                         break;
1252                 case OP_X86_COMPARE_MEMBASE_IMM:
1253                         /* 
1254                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1255                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1256                          * -->
1257                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1258                          * OP_COMPARE_IMM reg, imm
1259                          *
1260                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1261                          */
1262                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1263                             ins->inst_basereg == last_ins->inst_destbasereg &&
1264                             ins->inst_offset == last_ins->inst_offset) {
1265                                         ins->opcode = OP_COMPARE_IMM;
1266                                         ins->sreg1 = last_ins->sreg1;
1267
1268                                         /* check if we can remove cmp reg,0 with test null */
1269                                         if (!ins->inst_imm)
1270                                                 ins->opcode = OP_X86_TEST_NULL;
1271                                 }
1272
1273                         break;
1274                 case OP_LOAD_MEMBASE:
1275                 case OP_LOADI4_MEMBASE:
1276                         /* 
1277                          * Note: if reg1 = reg2 the load op is removed
1278                          *
1279                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1280                          * OP_LOAD_MEMBASE offset(basereg), reg2
1281                          * -->
1282                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1283                          * OP_MOVE reg1, reg2
1284                          */
1285                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1286                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1287                             ins->inst_basereg == last_ins->inst_destbasereg &&
1288                             ins->inst_offset == last_ins->inst_offset) {
1289                                 if (ins->dreg == last_ins->sreg1) {
1290                                         last_ins->next = ins->next;                             
1291                                         ins = ins->next;                                
1292                                         continue;
1293                                 } else {
1294                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1295                                         ins->opcode = OP_MOVE;
1296                                         ins->sreg1 = last_ins->sreg1;
1297                                 }
1298
1299                         /* 
1300                          * Note: reg1 must be different from the basereg in the second load
1301                          * Note: if reg1 = reg2 is equal then second load is removed
1302                          *
1303                          * OP_LOAD_MEMBASE offset(basereg), reg1
1304                          * OP_LOAD_MEMBASE offset(basereg), reg2
1305                          * -->
1306                          * OP_LOAD_MEMBASE offset(basereg), reg1
1307                          * OP_MOVE reg1, reg2
1308                          */
1309                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1310                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1311                               ins->inst_basereg != last_ins->dreg &&
1312                               ins->inst_basereg == last_ins->inst_basereg &&
1313                               ins->inst_offset == last_ins->inst_offset) {
1314
1315                                 if (ins->dreg == last_ins->dreg) {
1316                                         last_ins->next = ins->next;                             
1317                                         ins = ins->next;                                
1318                                         continue;
1319                                 } else {
1320                                         ins->opcode = OP_MOVE;
1321                                         ins->sreg1 = last_ins->dreg;
1322                                 }
1323
1324                                 //g_assert_not_reached ();
1325
1326 #if 0
1327                         /* 
1328                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1329                          * OP_LOAD_MEMBASE offset(basereg), reg
1330                          * -->
1331                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1332                          * OP_ICONST reg, imm
1333                          */
1334                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1335                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1336                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1337                                    ins->inst_offset == last_ins->inst_offset) {
1338                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1339                                 ins->opcode = OP_ICONST;
1340                                 ins->inst_c0 = last_ins->inst_imm;
1341                                 g_assert_not_reached (); // check this rule
1342 #endif
1343                         }
1344                         break;
1345                 case OP_LOADU1_MEMBASE:
1346                 case OP_LOADI1_MEMBASE:
1347                         /* 
1348                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1349                          * OP_LOAD_MEMBASE offset(basereg), reg2
1350                          * -->
1351                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1352                          * CONV_I2/U2 reg1, reg2
1353                          */
1354                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1355                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1356                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1357                                         ins->inst_offset == last_ins->inst_offset) {
1358                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1359                                 ins->sreg1 = last_ins->sreg1;
1360                         }
1361                         break;
1362                 case OP_LOADU2_MEMBASE:
1363                 case OP_LOADI2_MEMBASE:
1364                         /* 
1365                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1366                          * OP_LOAD_MEMBASE offset(basereg), reg2
1367                          * -->
1368                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1369                          * CONV_I2/U2 reg1, reg2
1370                          */
1371                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1372                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1373                                         ins->inst_offset == last_ins->inst_offset) {
1374                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1375                                 ins->sreg1 = last_ins->sreg1;
1376                         }
1377                         break;
1378                 case CEE_CONV_I4:
1379                 case CEE_CONV_U4:
1380                 case OP_MOVE:
1381                         /*
1382                          * Removes:
1383                          *
1384                          * OP_MOVE reg, reg 
1385                          */
1386                         if (ins->dreg == ins->sreg1) {
1387                                 if (last_ins)
1388                                         last_ins->next = ins->next;                             
1389                                 ins = ins->next;
1390                                 continue;
1391                         }
1392                         /* 
1393                          * Removes:
1394                          *
1395                          * OP_MOVE sreg, dreg 
1396                          * OP_MOVE dreg, sreg
1397                          */
1398                         if (last_ins && last_ins->opcode == OP_MOVE &&
1399                             ins->sreg1 == last_ins->dreg &&
1400                             ins->dreg == last_ins->sreg1) {
1401                                 last_ins->next = ins->next;                             
1402                                 ins = ins->next;                                
1403                                 continue;
1404                         }
1405                         break;
1406                         
1407                 case OP_X86_PUSH_MEMBASE:
1408                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1409                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1410                             ins->inst_basereg == last_ins->inst_destbasereg &&
1411                             ins->inst_offset == last_ins->inst_offset) {
1412                                     ins->opcode = OP_X86_PUSH;
1413                                     ins->sreg1 = last_ins->sreg1;
1414                         }
1415                         break;
1416                 }
1417                 last_ins = ins;
1418                 ins = ins->next;
1419         }
1420         bb->last_ins = last_ins;
1421 }
1422
1423 static const int 
1424 branch_cc_table [] = {
1425         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1426         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1427         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1428 };
1429
1430 static const char*const * ins_spec = pentium_desc;
1431
1432 /*#include "cprop.c"*/
1433 void
1434 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1435 {
1436         mono_local_regalloc (cfg, bb);
1437 }
1438
1439 static unsigned char*
1440 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1441 {
1442         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1443         x86_fnstcw_membase(code, X86_ESP, 0);
1444         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1445         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1446         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1447         x86_fldcw_membase (code, X86_ESP, 2);
1448         if (size == 8) {
1449                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1450                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1451                 x86_pop_reg (code, dreg);
1452                 /* FIXME: need the high register 
1453                  * x86_pop_reg (code, dreg_high);
1454                  */
1455         } else {
1456                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1457                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1458                 x86_pop_reg (code, dreg);
1459         }
1460         x86_fldcw_membase (code, X86_ESP, 0);
1461         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1462
1463         if (size == 1)
1464                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1465         else if (size == 2)
1466                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1467         return code;
1468 }
1469
1470 static unsigned char*
1471 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1472 {
1473         int sreg = tree->sreg1;
1474         int need_touch = FALSE;
1475
1476 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1477         need_touch = TRUE;
1478 #endif
1479
1480         if (need_touch) {
1481                 guint8* br[5];
1482
1483                 /*
1484                  * Under Windows:
1485                  * If requested stack size is larger than one page,
1486                  * perform stack-touch operation
1487                  */
1488                 /*
1489                  * Generate stack probe code.
1490                  * Under Windows, it is necessary to allocate one page at a time,
1491                  * "touching" stack after each successful sub-allocation. This is
1492                  * because of the way stack growth is implemented - there is a
1493                  * guard page before the lowest stack page that is currently commited.
1494                  * Stack normally grows sequentially so OS traps access to the
1495                  * guard page and commits more pages when needed.
1496                  */
1497                 x86_test_reg_imm (code, sreg, ~0xFFF);
1498                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1499
1500                 br[2] = code; /* loop */
1501                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1502                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1503
1504                 /* 
1505                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1506                  * that follows only initializes the last part of the area.
1507                  */
1508                 /* Same as the init code below with size==0x1000 */
1509                 if (tree->flags & MONO_INST_INIT) {
1510                         x86_push_reg (code, X86_EAX);
1511                         x86_push_reg (code, X86_ECX);
1512                         x86_push_reg (code, X86_EDI);
1513                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1514                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1515                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1516                         x86_cld (code);
1517                         x86_prefix (code, X86_REP_PREFIX);
1518                         x86_stosl (code);
1519                         x86_pop_reg (code, X86_EDI);
1520                         x86_pop_reg (code, X86_ECX);
1521                         x86_pop_reg (code, X86_EAX);
1522                 }
1523
1524                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1525                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1526                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1527                 x86_patch (br[3], br[2]);
1528                 x86_test_reg_reg (code, sreg, sreg);
1529                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1530                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1531
1532                 br[1] = code; x86_jump8 (code, 0);
1533
1534                 x86_patch (br[0], code);
1535                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1536                 x86_patch (br[1], code);
1537                 x86_patch (br[4], code);
1538         }
1539         else
1540                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1541
1542         if (tree->flags & MONO_INST_INIT) {
1543                 int offset = 0;
1544                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1545                         x86_push_reg (code, X86_EAX);
1546                         offset += 4;
1547                 }
1548                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1549                         x86_push_reg (code, X86_ECX);
1550                         offset += 4;
1551                 }
1552                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1553                         x86_push_reg (code, X86_EDI);
1554                         offset += 4;
1555                 }
1556                 
1557                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1558                 if (sreg != X86_ECX)
1559                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1560                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1561                                 
1562                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1563                 x86_cld (code);
1564                 x86_prefix (code, X86_REP_PREFIX);
1565                 x86_stosl (code);
1566                 
1567                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1568                         x86_pop_reg (code, X86_EDI);
1569                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1570                         x86_pop_reg (code, X86_ECX);
1571                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1572                         x86_pop_reg (code, X86_EAX);
1573         }
1574         return code;
1575 }
1576
1577
1578 static guint8*
1579 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1580 {
1581         CallInfo *cinfo;
1582         int quad;
1583
1584         /* Move return value to the target register */
1585         switch (ins->opcode) {
1586         case CEE_CALL:
1587         case OP_CALL_REG:
1588         case OP_CALL_MEMBASE:
1589                 if (ins->dreg != X86_EAX)
1590                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1591                 break;
1592         case OP_VCALL:
1593         case OP_VCALL_REG:
1594         case OP_VCALL_MEMBASE:
1595                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
1596                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1597                         /* Pop the destination address from the stack */
1598                         x86_pop_reg (code, X86_ECX);
1599                         
1600                         for (quad = 0; quad < 2; quad ++) {
1601                                 switch (cinfo->ret.pair_storage [quad]) {
1602                                 case ArgInIReg:
1603                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1604                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1605                                         break;
1606                                 case ArgNone:
1607                                         break;
1608                                 default:
1609                                         g_assert_not_reached ();
1610                                 }
1611                         }
1612                 }
1613                 g_free (cinfo);
1614         default:
1615                 break;
1616         }
1617
1618         return code;
1619 }
1620
1621 static guint8*
1622 emit_tls_get (guint8* code, int dreg, int tls_offset)
1623 {
1624 #ifdef PLATFORM_WIN32
1625         /* 
1626          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1627          * Journal and/or a disassembly of the TlsGet () function.
1628          */
1629         g_assert (tls_offset < 64);
1630         x86_prefix (code, X86_FS_PREFIX);
1631         x86_mov_reg_mem (code, dreg, 0x18, 4);
1632         /* Dunno what this does but TlsGetValue () contains it */
1633         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1634         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1635 #else
1636         x86_prefix (code, X86_GS_PREFIX);
1637         x86_mov_reg_mem (code, dreg, tls_offset, 4);                    
1638 #endif
1639         return code;
1640 }
1641
1642 #define REAL_PRINT_REG(text,reg) \
1643 mono_assert (reg >= 0); \
1644 x86_push_reg (code, X86_EAX); \
1645 x86_push_reg (code, X86_EDX); \
1646 x86_push_reg (code, X86_ECX); \
1647 x86_push_reg (code, reg); \
1648 x86_push_imm (code, reg); \
1649 x86_push_imm (code, text " %d %p\n"); \
1650 x86_mov_reg_imm (code, X86_EAX, printf); \
1651 x86_call_reg (code, X86_EAX); \
1652 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1653 x86_pop_reg (code, X86_ECX); \
1654 x86_pop_reg (code, X86_EDX); \
1655 x86_pop_reg (code, X86_EAX);
1656
1657 /* benchmark and set based on cpu */
1658 #define LOOP_ALIGNMENT 8
1659 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
1660
1661 void
1662 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1663 {
1664         MonoInst *ins;
1665         MonoCallInst *call;
1666         guint offset;
1667         guint8 *code = cfg->native_code + cfg->code_len;
1668         MonoInst *last_ins = NULL;
1669         guint last_offset = 0;
1670         int max_len, cpos;
1671
1672         if (cfg->opt & MONO_OPT_PEEPHOLE)
1673                 peephole_pass (cfg, bb);
1674
1675         if (cfg->opt & MONO_OPT_LOOP) {
1676                 int pad, align = LOOP_ALIGNMENT;
1677                 /* set alignment depending on cpu */
1678                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
1679                         pad = align - pad;
1680                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
1681                         x86_padding (code, pad);
1682                         cfg->code_len += pad;
1683                         bb->native_offset = cfg->code_len;
1684                 }
1685         }
1686
1687         if (cfg->verbose_level > 2)
1688                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1689
1690         cpos = bb->max_offset;
1691
1692         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
1693                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
1694                 g_assert (!cfg->compile_aot);
1695                 cpos += 6;
1696
1697                 cov->data [bb->dfn].cil_code = bb->cil_code;
1698                 /* this is not thread save, but good enough */
1699                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
1700         }
1701
1702         offset = code - cfg->native_code;
1703
1704         ins = bb->code;
1705         while (ins) {
1706                 offset = code - cfg->native_code;
1707
1708                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
1709
1710                 if (offset > (cfg->code_size - max_len - 16)) {
1711                         cfg->code_size *= 2;
1712                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
1713                         code = cfg->native_code + offset;
1714                         mono_jit_stats.code_reallocs++;
1715                 }
1716
1717                 mono_debug_record_line_number (cfg, ins, offset);
1718
1719                 switch (ins->opcode) {
1720                 case OP_BIGMUL:
1721                         x86_mul_reg (code, ins->sreg2, TRUE);
1722                         break;
1723                 case OP_BIGMUL_UN:
1724                         x86_mul_reg (code, ins->sreg2, FALSE);
1725                         break;
1726                 case OP_X86_SETEQ_MEMBASE:
1727                 case OP_X86_SETNE_MEMBASE:
1728                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
1729                                          ins->inst_basereg, ins->inst_offset, TRUE);
1730                         break;
1731                 case OP_STOREI1_MEMBASE_IMM:
1732                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
1733                         break;
1734                 case OP_STOREI2_MEMBASE_IMM:
1735                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
1736                         break;
1737                 case OP_STORE_MEMBASE_IMM:
1738                 case OP_STOREI4_MEMBASE_IMM:
1739                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
1740                         break;
1741                 case OP_STOREI1_MEMBASE_REG:
1742                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
1743                         break;
1744                 case OP_STOREI2_MEMBASE_REG:
1745                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
1746                         break;
1747                 case OP_STORE_MEMBASE_REG:
1748                 case OP_STOREI4_MEMBASE_REG:
1749                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
1750                         break;
1751                 case CEE_LDIND_I:
1752                 case CEE_LDIND_I4:
1753                 case CEE_LDIND_U4:
1754                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
1755                         break;
1756                 case OP_LOADU4_MEM:
1757                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
1758                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
1759                         break;
1760                 case OP_LOAD_MEMBASE:
1761                 case OP_LOADI4_MEMBASE:
1762                 case OP_LOADU4_MEMBASE:
1763                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
1764                         break;
1765                 case OP_LOADU1_MEMBASE:
1766                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
1767                         break;
1768                 case OP_LOADI1_MEMBASE:
1769                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
1770                         break;
1771                 case OP_LOADU2_MEMBASE:
1772                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
1773                         break;
1774                 case OP_LOADI2_MEMBASE:
1775                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
1776                         break;
1777                 case CEE_CONV_I1:
1778                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
1779                         break;
1780                 case CEE_CONV_I2:
1781                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
1782                         break;
1783                 case CEE_CONV_U1:
1784                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
1785                         break;
1786                 case CEE_CONV_U2:
1787                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
1788                         break;
1789                 case OP_COMPARE:
1790                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
1791                         break;
1792                 case OP_COMPARE_IMM:
1793                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
1794                         break;
1795                 case OP_X86_COMPARE_MEMBASE_REG:
1796                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
1797                         break;
1798                 case OP_X86_COMPARE_MEMBASE_IMM:
1799                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1800                         break;
1801                 case OP_X86_COMPARE_MEMBASE8_IMM:
1802                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1803                         break;
1804                 case OP_X86_COMPARE_REG_MEMBASE:
1805                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
1806                         break;
1807                 case OP_X86_COMPARE_MEM_IMM:
1808                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
1809                         break;
1810                 case OP_X86_TEST_NULL:
1811                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
1812                         break;
1813                 case OP_X86_ADD_MEMBASE_IMM:
1814                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1815                         break;
1816                 case OP_X86_ADD_MEMBASE:
1817                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
1818                         break;
1819                 case OP_X86_SUB_MEMBASE_IMM:
1820                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1821                         break;
1822                 case OP_X86_SUB_MEMBASE:
1823                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
1824                         break;
1825                 case OP_X86_INC_MEMBASE:
1826                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
1827                         break;
1828                 case OP_X86_INC_REG:
1829                         x86_inc_reg (code, ins->dreg);
1830                         break;
1831                 case OP_X86_DEC_MEMBASE:
1832                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
1833                         break;
1834                 case OP_X86_DEC_REG:
1835                         x86_dec_reg (code, ins->dreg);
1836                         break;
1837                 case OP_X86_MUL_MEMBASE:
1838                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
1839                         break;
1840                 case CEE_BREAK:
1841                         x86_breakpoint (code);
1842                         break;
1843                 case OP_ADDCC:
1844                 case CEE_ADD:
1845                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
1846                         break;
1847                 case OP_ADC:
1848                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
1849                         break;
1850                 case OP_ADDCC_IMM:
1851                 case OP_ADD_IMM:
1852                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
1853                         break;
1854                 case OP_ADC_IMM:
1855                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
1856                         break;
1857                 case OP_SUBCC:
1858                 case CEE_SUB:
1859                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
1860                         break;
1861                 case OP_SBB:
1862                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
1863                         break;
1864                 case OP_SUBCC_IMM:
1865                 case OP_SUB_IMM:
1866                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
1867                         break;
1868                 case OP_SBB_IMM:
1869                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
1870                         break;
1871                 case CEE_AND:
1872                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
1873                         break;
1874                 case OP_AND_IMM:
1875                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
1876                         break;
1877                 case CEE_DIV:
1878                         x86_cdq (code);
1879                         x86_div_reg (code, ins->sreg2, TRUE);
1880                         break;
1881                 case CEE_DIV_UN:
1882                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1883                         x86_div_reg (code, ins->sreg2, FALSE);
1884                         break;
1885                 case OP_DIV_IMM:
1886                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1887                         x86_cdq (code);
1888                         x86_div_reg (code, ins->sreg2, TRUE);
1889                         break;
1890                 case CEE_REM:
1891                         x86_cdq (code);
1892                         x86_div_reg (code, ins->sreg2, TRUE);
1893                         break;
1894                 case CEE_REM_UN:
1895                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1896                         x86_div_reg (code, ins->sreg2, FALSE);
1897                         break;
1898                 case OP_REM_IMM:
1899                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1900                         x86_cdq (code);
1901                         x86_div_reg (code, ins->sreg2, TRUE);
1902                         break;
1903                 case CEE_OR:
1904                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
1905                         break;
1906                 case OP_OR_IMM:
1907                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
1908                         break;
1909                 case CEE_XOR:
1910                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
1911                         break;
1912                 case OP_XOR_IMM:
1913                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
1914                         break;
1915                 case CEE_SHL:
1916                         g_assert (ins->sreg2 == X86_ECX);
1917                         x86_shift_reg (code, X86_SHL, ins->dreg);
1918                         break;
1919                 case CEE_SHR:
1920                         g_assert (ins->sreg2 == X86_ECX);
1921                         x86_shift_reg (code, X86_SAR, ins->dreg);
1922                         break;
1923                 case OP_SHR_IMM:
1924                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
1925                         break;
1926                 case OP_SHR_UN_IMM:
1927                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
1928                         break;
1929                 case CEE_SHR_UN:
1930                         g_assert (ins->sreg2 == X86_ECX);
1931                         x86_shift_reg (code, X86_SHR, ins->dreg);
1932                         break;
1933                 case OP_SHL_IMM:
1934                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
1935                         break;
1936                 case OP_LSHL: {
1937                         guint8 *jump_to_end;
1938
1939                         /* handle shifts below 32 bits */
1940                         x86_shld_reg (code, ins->unused, ins->sreg1);
1941                         x86_shift_reg (code, X86_SHL, ins->sreg1);
1942
1943                         x86_test_reg_imm (code, X86_ECX, 32);
1944                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
1945
1946                         /* handle shift over 32 bit */
1947                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
1948                         x86_clear_reg (code, ins->sreg1);
1949                         
1950                         x86_patch (jump_to_end, code);
1951                         }
1952                         break;
1953                 case OP_LSHR: {
1954                         guint8 *jump_to_end;
1955
1956                         /* handle shifts below 32 bits */
1957                         x86_shrd_reg (code, ins->sreg1, ins->unused);
1958                         x86_shift_reg (code, X86_SAR, ins->unused);
1959
1960                         x86_test_reg_imm (code, X86_ECX, 32);
1961                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
1962
1963                         /* handle shifts over 31 bits */
1964                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
1965                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
1966                         
1967                         x86_patch (jump_to_end, code);
1968                         }
1969                         break;
1970                 case OP_LSHR_UN: {
1971                         guint8 *jump_to_end;
1972
1973                         /* handle shifts below 32 bits */
1974                         x86_shrd_reg (code, ins->sreg1, ins->unused);
1975                         x86_shift_reg (code, X86_SHR, ins->unused);
1976
1977                         x86_test_reg_imm (code, X86_ECX, 32);
1978                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
1979
1980                         /* handle shifts over 31 bits */
1981                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
1982                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
1983                         
1984                         x86_patch (jump_to_end, code);
1985                         }
1986                         break;
1987                 case OP_LSHL_IMM:
1988                         if (ins->inst_imm >= 32) {
1989                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
1990                                 x86_clear_reg (code, ins->sreg1);
1991                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
1992                         } else {
1993                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
1994                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
1995                         }
1996                         break;
1997                 case OP_LSHR_IMM:
1998                         if (ins->inst_imm >= 32) {
1999                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
2000                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
2001                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2002                         } else {
2003                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2004                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
2005                         }
2006                         break;
2007                 case OP_LSHR_UN_IMM:
2008                         if (ins->inst_imm >= 32) {
2009                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2010                                 x86_clear_reg (code, ins->unused);
2011                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2012                         } else {
2013                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2014                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
2015                         }
2016                         break;
2017                 case CEE_NOT:
2018                         x86_not_reg (code, ins->sreg1);
2019                         break;
2020                 case CEE_NEG:
2021                         x86_neg_reg (code, ins->sreg1);
2022                         break;
2023                 case OP_SEXT_I1:
2024                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2025                         break;
2026                 case OP_SEXT_I2:
2027                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2028                         break;
2029                 case CEE_MUL:
2030                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2031                         break;
2032                 case OP_MUL_IMM:
2033                         switch (ins->inst_imm) {
2034                         case 2:
2035                                 /* MOV r1, r2 */
2036                                 /* ADD r1, r1 */
2037                                 if (ins->dreg != ins->sreg1)
2038                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2039                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2040                                 break;
2041                         case 3:
2042                                 /* LEA r1, [r2 + r2*2] */
2043                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2044                                 break;
2045                         case 5:
2046                                 /* LEA r1, [r2 + r2*4] */
2047                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2048                                 break;
2049                         case 6:
2050                                 /* LEA r1, [r2 + r2*2] */
2051                                 /* ADD r1, r1          */
2052                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2053                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2054                                 break;
2055                         case 9:
2056                                 /* LEA r1, [r2 + r2*8] */
2057                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2058                                 break;
2059                         case 10:
2060                                 /* LEA r1, [r2 + r2*4] */
2061                                 /* ADD r1, r1          */
2062                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2063                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2064                                 break;
2065                         case 12:
2066                                 /* LEA r1, [r2 + r2*2] */
2067                                 /* SHL r1, 2           */
2068                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2069                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2070                                 break;
2071                         case 25:
2072                                 /* LEA r1, [r2 + r2*4] */
2073                                 /* LEA r1, [r1 + r1*4] */
2074                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2075                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2076                                 break;
2077                         case 100:
2078                                 /* LEA r1, [r2 + r2*4] */
2079                                 /* SHL r1, 2           */
2080                                 /* LEA r1, [r1 + r1*4] */
2081                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2082                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2083                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2084                                 break;
2085                         default:
2086                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2087                                 break;
2088                         }
2089                         break;
2090                 case CEE_MUL_OVF:
2091                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2092                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2093                         break;
2094                 case CEE_MUL_OVF_UN: {
2095                         /* the mul operation and the exception check should most likely be split */
2096                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2097                         /*g_assert (ins->sreg2 == X86_EAX);
2098                         g_assert (ins->dreg == X86_EAX);*/
2099                         if (ins->sreg2 == X86_EAX) {
2100                                 non_eax_reg = ins->sreg1;
2101                         } else if (ins->sreg1 == X86_EAX) {
2102                                 non_eax_reg = ins->sreg2;
2103                         } else {
2104                                 /* no need to save since we're going to store to it anyway */
2105                                 if (ins->dreg != X86_EAX) {
2106                                         saved_eax = TRUE;
2107                                         x86_push_reg (code, X86_EAX);
2108                                 }
2109                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2110                                 non_eax_reg = ins->sreg2;
2111                         }
2112                         if (ins->dreg == X86_EDX) {
2113                                 if (!saved_eax) {
2114                                         saved_eax = TRUE;
2115                                         x86_push_reg (code, X86_EAX);
2116                                 }
2117                         } else if (ins->dreg != X86_EAX) {
2118                                 saved_edx = TRUE;
2119                                 x86_push_reg (code, X86_EDX);
2120                         }
2121                         x86_mul_reg (code, non_eax_reg, FALSE);
2122                         /* save before the check since pop and mov don't change the flags */
2123                         if (ins->dreg != X86_EAX)
2124                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2125                         if (saved_edx)
2126                                 x86_pop_reg (code, X86_EDX);
2127                         if (saved_eax)
2128                                 x86_pop_reg (code, X86_EAX);
2129                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2130                         break;
2131                 }
2132                 case OP_ICONST:
2133                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2134                         break;
2135                 case OP_AOTCONST:
2136                         g_assert_not_reached ();
2137                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2138                         x86_mov_reg_imm (code, ins->dreg, 0);
2139                         break;
2140                 case OP_LOAD_GOTADDR:
2141                         x86_call_imm (code, 0);
2142                         /* 
2143                          * The patch needs to point to the pop, since the GOT offset needs 
2144                          * to be added to that address.
2145                          */
2146                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2147                         x86_pop_reg (code, ins->dreg);
2148                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2149                         break;
2150                 case OP_GOT_ENTRY:
2151                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2152                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2153                         break;
2154                 case OP_X86_PUSH_GOT_ENTRY:
2155                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2156                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2157                         break;
2158                 case CEE_CONV_I4:
2159                 case OP_MOVE:
2160                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2161                         break;
2162                 case CEE_CONV_U4:
2163                         g_assert_not_reached ();
2164                 case CEE_JMP: {
2165                         /*
2166                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2167                          * Keep in sync with the code in emit_epilog.
2168                          */
2169                         int pos = 0;
2170
2171                         /* FIXME: no tracing support... */
2172                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2173                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2174                         /* reset offset to make max_len work */
2175                         offset = code - cfg->native_code;
2176
2177                         g_assert (!cfg->method->save_lmf);
2178
2179                         if (cfg->used_int_regs & (1 << X86_EBX))
2180                                 pos -= 4;
2181                         if (cfg->used_int_regs & (1 << X86_EDI))
2182                                 pos -= 4;
2183                         if (cfg->used_int_regs & (1 << X86_ESI))
2184                                 pos -= 4;
2185                         if (pos)
2186                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2187         
2188                         if (cfg->used_int_regs & (1 << X86_ESI))
2189                                 x86_pop_reg (code, X86_ESI);
2190                         if (cfg->used_int_regs & (1 << X86_EDI))
2191                                 x86_pop_reg (code, X86_EDI);
2192                         if (cfg->used_int_regs & (1 << X86_EBX))
2193                                 x86_pop_reg (code, X86_EBX);
2194         
2195                         /* restore ESP/EBP */
2196                         x86_leave (code);
2197                         offset = code - cfg->native_code;
2198                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2199                         x86_jump32 (code, 0);
2200                         break;
2201                 }
2202                 case OP_CHECK_THIS:
2203                         /* ensure ins->sreg1 is not NULL
2204                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2205                          * cmp DWORD PTR [eax], 0
2206                          */
2207                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2208                         break;
2209                 case OP_ARGLIST: {
2210                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2211                         x86_push_reg (code, hreg);
2212                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2213                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2214                         x86_pop_reg (code, hreg);
2215                         break;
2216                 }
2217                 case OP_FCALL:
2218                 case OP_LCALL:
2219                 case OP_VCALL:
2220                 case OP_VOIDCALL:
2221                 case CEE_CALL:
2222                         call = (MonoCallInst*)ins;
2223                         if (ins->flags & MONO_INST_HAS_METHOD)
2224                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2225                         else
2226                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2227                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2228                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2229                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2230                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2231                                  * smart enough to do that optimization yet
2232                                  *
2233                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2234                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2235                                  * (most likely from locality benefits). People with other processors should
2236                                  * check on theirs to see what happens.
2237                                  */
2238                                 if (call->stack_usage == 4) {
2239                                         /* we want to use registers that won't get used soon, so use
2240                                          * ecx, as eax will get allocated first. edx is used by long calls,
2241                                          * so we can't use that.
2242                                          */
2243                                         
2244                                         x86_pop_reg (code, X86_ECX);
2245                                 } else {
2246                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2247                                 }
2248                         }
2249                         code = emit_move_return_value (cfg, ins, code);
2250                         break;
2251                 case OP_FCALL_REG:
2252                 case OP_LCALL_REG:
2253                 case OP_VCALL_REG:
2254                 case OP_VOIDCALL_REG:
2255                 case OP_CALL_REG:
2256                         call = (MonoCallInst*)ins;
2257                         x86_call_reg (code, ins->sreg1);
2258                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2259                                 if (call->stack_usage == 4)
2260                                         x86_pop_reg (code, X86_ECX);
2261                                 else
2262                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2263                         }
2264                         code = emit_move_return_value (cfg, ins, code);
2265                         break;
2266                 case OP_FCALL_MEMBASE:
2267                 case OP_LCALL_MEMBASE:
2268                 case OP_VCALL_MEMBASE:
2269                 case OP_VOIDCALL_MEMBASE:
2270                 case OP_CALL_MEMBASE:
2271                         call = (MonoCallInst*)ins;
2272                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2273                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2274                                 if (call->stack_usage == 4)
2275                                         x86_pop_reg (code, X86_ECX);
2276                                 else
2277                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2278                         }
2279                         code = emit_move_return_value (cfg, ins, code);
2280                         break;
2281                 case OP_OUTARG:
2282                 case OP_X86_PUSH:
2283                         x86_push_reg (code, ins->sreg1);
2284                         break;
2285                 case OP_X86_PUSH_IMM:
2286                         x86_push_imm (code, ins->inst_imm);
2287                         break;
2288                 case OP_X86_PUSH_MEMBASE:
2289                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2290                         break;
2291                 case OP_X86_PUSH_OBJ: 
2292                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2293                         x86_push_reg (code, X86_EDI);
2294                         x86_push_reg (code, X86_ESI);
2295                         x86_push_reg (code, X86_ECX);
2296                         if (ins->inst_offset)
2297                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2298                         else
2299                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2300                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2301                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2302                         x86_cld (code);
2303                         x86_prefix (code, X86_REP_PREFIX);
2304                         x86_movsd (code);
2305                         x86_pop_reg (code, X86_ECX);
2306                         x86_pop_reg (code, X86_ESI);
2307                         x86_pop_reg (code, X86_EDI);
2308                         break;
2309                 case OP_X86_LEA:
2310                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2311                         break;
2312                 case OP_X86_LEA_MEMBASE:
2313                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2314                         break;
2315                 case OP_X86_XCHG:
2316                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2317                         break;
2318                 case OP_LOCALLOC:
2319                         /* keep alignment */
2320                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2321                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2322                         code = mono_emit_stack_alloc (code, ins);
2323                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2324                         break;
2325                 case CEE_RET:
2326                         x86_ret (code);
2327                         break;
2328                 case CEE_THROW: {
2329                         x86_push_reg (code, ins->sreg1);
2330                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2331                                                           (gpointer)"mono_arch_throw_exception");
2332                         break;
2333                 }
2334                 case OP_RETHROW: {
2335                         x86_push_reg (code, ins->sreg1);
2336                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2337                                                           (gpointer)"mono_arch_rethrow_exception");
2338                         break;
2339                 }
2340                 case OP_CALL_HANDLER: 
2341                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2342                         x86_call_imm (code, 0);
2343                         break;
2344                 case OP_LABEL:
2345                         ins->inst_c0 = code - cfg->native_code;
2346                         break;
2347                 case CEE_BR:
2348                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2349                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2350                         //break;
2351                         if (ins->flags & MONO_INST_BRLABEL) {
2352                                 if (ins->inst_i0->inst_c0) {
2353                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2354                                 } else {
2355                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2356                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2357                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2358                                                 x86_jump8 (code, 0);
2359                                         else 
2360                                                 x86_jump32 (code, 0);
2361                                 }
2362                         } else {
2363                                 if (ins->inst_target_bb->native_offset) {
2364                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2365                                 } else {
2366                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2367                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2368                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2369                                                 x86_jump8 (code, 0);
2370                                         else 
2371                                                 x86_jump32 (code, 0);
2372                                 } 
2373                         }
2374                         break;
2375                 case OP_BR_REG:
2376                         x86_jump_reg (code, ins->sreg1);
2377                         break;
2378                 case OP_CEQ:
2379                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2380                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2381                         break;
2382                 case OP_CLT:
2383                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2384                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2385                         break;
2386                 case OP_CLT_UN:
2387                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2388                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2389                         break;
2390                 case OP_CGT:
2391                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2392                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2393                         break;
2394                 case OP_CGT_UN:
2395                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2396                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2397                         break;
2398                 case OP_CNE:
2399                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2400                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2401                         break;
2402                 case OP_COND_EXC_EQ:
2403                 case OP_COND_EXC_NE_UN:
2404                 case OP_COND_EXC_LT:
2405                 case OP_COND_EXC_LT_UN:
2406                 case OP_COND_EXC_GT:
2407                 case OP_COND_EXC_GT_UN:
2408                 case OP_COND_EXC_GE:
2409                 case OP_COND_EXC_GE_UN:
2410                 case OP_COND_EXC_LE:
2411                 case OP_COND_EXC_LE_UN:
2412                 case OP_COND_EXC_OV:
2413                 case OP_COND_EXC_NO:
2414                 case OP_COND_EXC_C:
2415                 case OP_COND_EXC_NC:
2416                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2417                         break;
2418                 case CEE_BEQ:
2419                 case CEE_BNE_UN:
2420                 case CEE_BLT:
2421                 case CEE_BLT_UN:
2422                 case CEE_BGT:
2423                 case CEE_BGT_UN:
2424                 case CEE_BGE:
2425                 case CEE_BGE_UN:
2426                 case CEE_BLE:
2427                 case CEE_BLE_UN:
2428                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2429                         break;
2430
2431                 /* floating point opcodes */
2432                 case OP_R8CONST: {
2433                         double d = *(double *)ins->inst_p0;
2434
2435                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2436                                 x86_fldz (code);
2437                         } else if (d == 1.0) {
2438                                 x86_fld1 (code);
2439                         } else {
2440                                 if (cfg->compile_aot) {
2441                                         guint32 *val = (guint32*)&d;
2442                                         x86_push_imm (code, val [1]);
2443                                         x86_push_imm (code, val [0]);
2444                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2445                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2446                                 }
2447                                 else {
2448                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2449                                         x86_fld (code, NULL, TRUE);
2450                                 }
2451                         }
2452                         break;
2453                 }
2454                 case OP_R4CONST: {
2455                         float f = *(float *)ins->inst_p0;
2456
2457                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2458                                 x86_fldz (code);
2459                         } else if (f == 1.0) {
2460                                 x86_fld1 (code);
2461                         } else {
2462                                 if (cfg->compile_aot) {
2463                                         guint32 val = *(guint32*)&f;
2464                                         x86_push_imm (code, val);
2465                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2466                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2467                                 }
2468                                 else {
2469                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2470                                         x86_fld (code, NULL, FALSE);
2471                                 }
2472                         }
2473                         break;
2474                 }
2475                 case OP_STORER8_MEMBASE_REG:
2476                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2477                         break;
2478                 case OP_LOADR8_SPILL_MEMBASE:
2479                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2480                         x86_fxch (code, 1);
2481                         break;
2482                 case OP_LOADR8_MEMBASE:
2483                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2484                         break;
2485                 case OP_STORER4_MEMBASE_REG:
2486                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2487                         break;
2488                 case OP_LOADR4_MEMBASE:
2489                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2490                         break;
2491                 case CEE_CONV_R4: /* FIXME: change precision */
2492                 case CEE_CONV_R8:
2493                         x86_push_reg (code, ins->sreg1);
2494                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2495                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2496                         break;
2497                 case OP_X86_FP_LOAD_I8:
2498                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2499                         break;
2500                 case OP_X86_FP_LOAD_I4:
2501                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2502                         break;
2503                 case OP_FCONV_TO_I1:
2504                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2505                         break;
2506                 case OP_FCONV_TO_U1:
2507                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2508                         break;
2509                 case OP_FCONV_TO_I2:
2510                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2511                         break;
2512                 case OP_FCONV_TO_U2:
2513                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2514                         break;
2515                 case OP_FCONV_TO_I4:
2516                 case OP_FCONV_TO_I:
2517                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2518                         break;
2519                 case OP_FCONV_TO_I8:
2520                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2521                         x86_fnstcw_membase(code, X86_ESP, 0);
2522                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2523                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2524                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2525                         x86_fldcw_membase (code, X86_ESP, 2);
2526                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2527                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2528                         x86_pop_reg (code, ins->dreg);
2529                         x86_pop_reg (code, ins->unused);
2530                         x86_fldcw_membase (code, X86_ESP, 0);
2531                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2532                         break;
2533                 case OP_LCONV_TO_R_UN: { 
2534                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2535                         guint8 *br;
2536
2537                         /* load 64bit integer to FP stack */
2538                         x86_push_imm (code, 0);
2539                         x86_push_reg (code, ins->sreg2);
2540                         x86_push_reg (code, ins->sreg1);
2541                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2542                         /* store as 80bit FP value */
2543                         x86_fst80_membase (code, X86_ESP, 0);
2544                         
2545                         /* test if lreg is negative */
2546                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2547                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2548         
2549                         /* add correction constant mn */
2550                         x86_fld80_mem (code, mn);
2551                         x86_fld80_membase (code, X86_ESP, 0);
2552                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2553                         x86_fst80_membase (code, X86_ESP, 0);
2554
2555                         x86_patch (br, code);
2556
2557                         x86_fld80_membase (code, X86_ESP, 0);
2558                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2559
2560                         break;
2561                 }
2562                 case OP_LCONV_TO_OVF_I: {
2563                         guint8 *br [3], *label [1];
2564                         MonoInst *tins;
2565
2566                         /* 
2567                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2568                          */
2569                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2570
2571                         /* If the low word top bit is set, see if we are negative */
2572                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2573                         /* We are not negative (no top bit set, check for our top word to be zero */
2574                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2575                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2576                         label [0] = code;
2577
2578                         /* throw exception */
2579                         tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
2580                         if (tins) {
2581                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
2582                                 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
2583                                         x86_jump8 (code, 0);
2584                                 else
2585                                         x86_jump32 (code, 0);
2586                         } else {
2587                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2588                                 x86_jump32 (code, 0);
2589                         }
2590         
2591         
2592                         x86_patch (br [0], code);
2593                         /* our top bit is set, check that top word is 0xfffffff */
2594                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2595                 
2596                         x86_patch (br [1], code);
2597                         /* nope, emit exception */
2598                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2599                         x86_patch (br [2], label [0]);
2600
2601                         if (ins->dreg != ins->sreg1)
2602                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2603                         break;
2604                 }
2605                 case OP_FADD:
2606                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2607                         break;
2608                 case OP_FSUB:
2609                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2610                         break;          
2611                 case OP_FMUL:
2612                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2613                         break;          
2614                 case OP_FDIV:
2615                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2616                         break;          
2617                 case OP_FNEG:
2618                         x86_fchs (code);
2619                         break;          
2620                 case OP_SIN:
2621                         x86_fsin (code);
2622                         x86_fldz (code);
2623                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2624                         break;          
2625                 case OP_COS:
2626                         x86_fcos (code);
2627                         x86_fldz (code);
2628                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2629                         break;          
2630                 case OP_ABS:
2631                         x86_fabs (code);
2632                         break;          
2633                 case OP_TAN: {
2634                         /* 
2635                          * it really doesn't make sense to inline all this code,
2636                          * it's here just to show that things may not be as simple 
2637                          * as they appear.
2638                          */
2639                         guchar *check_pos, *end_tan, *pop_jump;
2640                         x86_push_reg (code, X86_EAX);
2641                         x86_fptan (code);
2642                         x86_fnstsw (code);
2643                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2644                         check_pos = code;
2645                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2646                         x86_fstp (code, 0); /* pop the 1.0 */
2647                         end_tan = code;
2648                         x86_jump8 (code, 0);
2649                         x86_fldpi (code);
2650                         x86_fp_op (code, X86_FADD, 0);
2651                         x86_fxch (code, 1);
2652                         x86_fprem1 (code);
2653                         x86_fstsw (code);
2654                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2655                         pop_jump = code;
2656                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2657                         x86_fstp (code, 1);
2658                         x86_fptan (code);
2659                         x86_patch (pop_jump, code);
2660                         x86_fstp (code, 0); /* pop the 1.0 */
2661                         x86_patch (check_pos, code);
2662                         x86_patch (end_tan, code);
2663                         x86_fldz (code);
2664                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2665                         x86_pop_reg (code, X86_EAX);
2666                         break;
2667                 }
2668                 case OP_ATAN:
2669                         x86_fld1 (code);
2670                         x86_fpatan (code);
2671                         x86_fldz (code);
2672                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2673                         break;          
2674                 case OP_SQRT:
2675                         x86_fsqrt (code);
2676                         break;          
2677                 case OP_X86_FPOP:
2678                         x86_fstp (code, 0);
2679                         break;          
2680                 case OP_FREM: {
2681                         guint8 *l1, *l2;
2682
2683                         x86_push_reg (code, X86_EAX);
2684                         /* we need to exchange ST(0) with ST(1) */
2685                         x86_fxch (code, 1);
2686
2687                         /* this requires a loop, because fprem somtimes 
2688                          * returns a partial remainder */
2689                         l1 = code;
2690                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2691                         /* x86_fprem1 (code); */
2692                         x86_fprem (code);
2693                         x86_fnstsw (code);
2694                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
2695                         l2 = code + 2;
2696                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2697
2698                         /* pop result */
2699                         x86_fstp (code, 1);
2700
2701                         x86_pop_reg (code, X86_EAX);
2702                         break;
2703                 }
2704                 case OP_FCOMPARE:
2705                         if (cfg->opt & MONO_OPT_FCMOV) {
2706                                 x86_fcomip (code, 1);
2707                                 x86_fstp (code, 0);
2708                                 break;
2709                         }
2710                         /* this overwrites EAX */
2711                         EMIT_FPCOMPARE(code);
2712                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2713                         break;
2714                 case OP_FCEQ:
2715                         if (cfg->opt & MONO_OPT_FCMOV) {
2716                                 /* zeroing the register at the start results in 
2717                                  * shorter and faster code (we can also remove the widening op)
2718                                  */
2719                                 guchar *unordered_check;
2720                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2721                                 x86_fcomip (code, 1);
2722                                 x86_fstp (code, 0);
2723                                 unordered_check = code;
2724                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2725                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2726                                 x86_patch (unordered_check, code);
2727                                 break;
2728                         }
2729                         if (ins->dreg != X86_EAX) 
2730                                 x86_push_reg (code, X86_EAX);
2731
2732                         EMIT_FPCOMPARE(code);
2733                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2734                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2735                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2736                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2737
2738                         if (ins->dreg != X86_EAX) 
2739                                 x86_pop_reg (code, X86_EAX);
2740                         break;
2741                 case OP_FCLT:
2742                 case OP_FCLT_UN:
2743                         if (cfg->opt & MONO_OPT_FCMOV) {
2744                                 /* zeroing the register at the start results in 
2745                                  * shorter and faster code (we can also remove the widening op)
2746                                  */
2747                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2748                                 x86_fcomip (code, 1);
2749                                 x86_fstp (code, 0);
2750                                 if (ins->opcode == OP_FCLT_UN) {
2751                                         guchar *unordered_check = code;
2752                                         guchar *jump_to_end;
2753                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2754                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2755                                         jump_to_end = code;
2756                                         x86_jump8 (code, 0);
2757                                         x86_patch (unordered_check, code);
2758                                         x86_inc_reg (code, ins->dreg);
2759                                         x86_patch (jump_to_end, code);
2760                                 } else {
2761                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2762                                 }
2763                                 break;
2764                         }
2765                         if (ins->dreg != X86_EAX) 
2766                                 x86_push_reg (code, X86_EAX);
2767
2768                         EMIT_FPCOMPARE(code);
2769                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2770                         if (ins->opcode == OP_FCLT_UN) {
2771                                 guchar *is_not_zero_check, *end_jump;
2772                                 is_not_zero_check = code;
2773                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2774                                 end_jump = code;
2775                                 x86_jump8 (code, 0);
2776                                 x86_patch (is_not_zero_check, code);
2777                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2778
2779                                 x86_patch (end_jump, code);
2780                         }
2781                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2782                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2783
2784                         if (ins->dreg != X86_EAX) 
2785                                 x86_pop_reg (code, X86_EAX);
2786                         break;
2787                 case OP_FCGT:
2788                 case OP_FCGT_UN:
2789                         if (cfg->opt & MONO_OPT_FCMOV) {
2790                                 /* zeroing the register at the start results in 
2791                                  * shorter and faster code (we can also remove the widening op)
2792                                  */
2793                                 guchar *unordered_check;
2794                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2795                                 x86_fcomip (code, 1);
2796                                 x86_fstp (code, 0);
2797                                 if (ins->opcode == OP_FCGT) {
2798                                         unordered_check = code;
2799                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2800                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2801                                         x86_patch (unordered_check, code);
2802                                 } else {
2803                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2804                                 }
2805                                 break;
2806                         }
2807                         if (ins->dreg != X86_EAX) 
2808                                 x86_push_reg (code, X86_EAX);
2809
2810                         EMIT_FPCOMPARE(code);
2811                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2812                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2813                         if (ins->opcode == OP_FCGT_UN) {
2814                                 guchar *is_not_zero_check, *end_jump;
2815                                 is_not_zero_check = code;
2816                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2817                                 end_jump = code;
2818                                 x86_jump8 (code, 0);
2819                                 x86_patch (is_not_zero_check, code);
2820                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2821         
2822                                 x86_patch (end_jump, code);
2823                         }
2824                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2825                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2826
2827                         if (ins->dreg != X86_EAX) 
2828                                 x86_pop_reg (code, X86_EAX);
2829                         break;
2830                 case OP_FBEQ:
2831                         if (cfg->opt & MONO_OPT_FCMOV) {
2832                                 guchar *jump = code;
2833                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
2834                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2835                                 x86_patch (jump, code);
2836                                 break;
2837                         }
2838                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2839                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2840                         break;
2841                 case OP_FBNE_UN:
2842                         /* Branch if C013 != 100 */
2843                         if (cfg->opt & MONO_OPT_FCMOV) {
2844                                 /* branch if !ZF or (PF|CF) */
2845                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2846                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2847                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
2848                                 break;
2849                         }
2850                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2851                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2852                         break;
2853                 case OP_FBLT:
2854                         if (cfg->opt & MONO_OPT_FCMOV) {
2855                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2856                                 break;
2857                         }
2858                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2859                         break;
2860                 case OP_FBLT_UN:
2861                         if (cfg->opt & MONO_OPT_FCMOV) {
2862                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2863                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2864                                 break;
2865                         }
2866                         if (ins->opcode == OP_FBLT_UN) {
2867                                 guchar *is_not_zero_check, *end_jump;
2868                                 is_not_zero_check = code;
2869                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2870                                 end_jump = code;
2871                                 x86_jump8 (code, 0);
2872                                 x86_patch (is_not_zero_check, code);
2873                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2874
2875                                 x86_patch (end_jump, code);
2876                         }
2877                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2878                         break;
2879                 case OP_FBGT:
2880                 case OP_FBGT_UN:
2881                         if (cfg->opt & MONO_OPT_FCMOV) {
2882                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
2883                                 break;
2884                         }
2885                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2886                         if (ins->opcode == OP_FBGT_UN) {
2887                                 guchar *is_not_zero_check, *end_jump;
2888                                 is_not_zero_check = code;
2889                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2890                                 end_jump = code;
2891                                 x86_jump8 (code, 0);
2892                                 x86_patch (is_not_zero_check, code);
2893                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2894
2895                                 x86_patch (end_jump, code);
2896                         }
2897                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2898                         break;
2899                 case OP_FBGE:
2900                         /* Branch if C013 == 100 or 001 */
2901                         if (cfg->opt & MONO_OPT_FCMOV) {
2902                                 guchar *br1;
2903
2904                                 /* skip branch if C1=1 */
2905                                 br1 = code;
2906                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2907                                 /* branch if (C0 | C3) = 1 */
2908                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
2909                                 x86_patch (br1, code);
2910                                 break;
2911                         }
2912                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2913                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2914                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2915                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2916                         break;
2917                 case OP_FBGE_UN:
2918                         /* Branch if C013 == 000 */
2919                         if (cfg->opt & MONO_OPT_FCMOV) {
2920                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
2921                                 break;
2922                         }
2923                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2924                         break;
2925                 case OP_FBLE:
2926                         /* Branch if C013=000 or 100 */
2927                         if (cfg->opt & MONO_OPT_FCMOV) {
2928                                 guchar *br1;
2929
2930                                 /* skip branch if C1=1 */
2931                                 br1 = code;
2932                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2933                                 /* branch if C0=0 */
2934                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
2935                                 x86_patch (br1, code);
2936                                 break;
2937                         }
2938                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
2939                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
2940                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2941                         break;
2942                 case OP_FBLE_UN:
2943                         /* Branch if C013 != 001 */
2944                         if (cfg->opt & MONO_OPT_FCMOV) {
2945                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2946                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
2947                                 break;
2948                         }
2949                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2950                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2951                         break;
2952                 case CEE_CKFINITE: {
2953                         x86_push_reg (code, X86_EAX);
2954                         x86_fxam (code);
2955                         x86_fnstsw (code);
2956                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
2957                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2958                         x86_pop_reg (code, X86_EAX);
2959                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
2960                         break;
2961                 }
2962                 case OP_TLS_GET: {
2963                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
2964                         break;
2965                 }
2966                 case OP_MEMORY_BARRIER: {
2967                         /* Not needed on x86 */
2968                         break;
2969                 }
2970                 case OP_ATOMIC_ADD_I4: {
2971                         int dreg = ins->dreg;
2972
2973                         if (dreg == ins->inst_basereg) {
2974                                 x86_push_reg (code, ins->sreg2);
2975                                 dreg = ins->sreg2;
2976                         } 
2977                         
2978                         if (dreg != ins->sreg2)
2979                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
2980
2981                         x86_prefix (code, X86_LOCK_PREFIX);
2982                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
2983
2984                         if (dreg != ins->dreg) {
2985                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
2986                                 x86_pop_reg (code, dreg);
2987                         }
2988
2989                         break;
2990                 }
2991                 case OP_ATOMIC_ADD_NEW_I4: {
2992                         int dreg = ins->dreg;
2993
2994                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
2995                         if (ins->sreg2 == dreg) {
2996                                 if (dreg == X86_EBX) {
2997                                         dreg = X86_EDI;
2998                                         if (ins->inst_basereg == X86_EDI)
2999                                                 dreg = X86_ESI;
3000                                 } else {
3001                                         dreg = X86_EBX;
3002                                         if (ins->inst_basereg == X86_EBX)
3003                                                 dreg = X86_EDI;
3004                                 }
3005                         } else if (ins->inst_basereg == dreg) {
3006                                 if (dreg == X86_EBX) {
3007                                         dreg = X86_EDI;
3008                                         if (ins->sreg2 == X86_EDI)
3009                                                 dreg = X86_ESI;
3010                                 } else {
3011                                         dreg = X86_EBX;
3012                                         if (ins->sreg2 == X86_EBX)
3013                                                 dreg = X86_EDI;
3014                                 }
3015                         }
3016
3017                         if (dreg != ins->dreg) {
3018                                 x86_push_reg (code, dreg);
3019                         }
3020
3021                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3022                         x86_prefix (code, X86_LOCK_PREFIX);
3023                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3024                         /* dreg contains the old value, add with sreg2 value */
3025                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3026                         
3027                         if (ins->dreg != dreg) {
3028                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3029                                 x86_pop_reg (code, dreg);
3030                         }
3031
3032                         break;
3033                 }
3034                 case OP_ATOMIC_EXCHANGE_I4: {
3035                         guchar *br[2];
3036                         int sreg2 = ins->sreg2;
3037                         int breg = ins->inst_basereg;
3038
3039                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3040                          * hack to overcome limits in x86 reg allocator 
3041                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3042                          */
3043                         if (ins->dreg != X86_EAX)
3044                                 x86_push_reg (code, X86_EAX);
3045                         
3046                         /* We need the EAX reg for the cmpxchg */
3047                         if (ins->sreg2 == X86_EAX) {
3048                                 x86_push_reg (code, X86_EDX);
3049                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3050                                 sreg2 = X86_EDX;
3051                         }
3052
3053                         if (breg == X86_EAX) {
3054                                 x86_push_reg (code, X86_ESI);
3055                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3056                                 breg = X86_ESI;
3057                         }
3058
3059                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3060
3061                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3062                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3063                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3064                         x86_patch (br [1], br [0]);
3065
3066                         if (breg != ins->inst_basereg)
3067                                 x86_pop_reg (code, X86_ESI);
3068
3069                         if (ins->dreg != X86_EAX) {
3070                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3071                                 x86_pop_reg (code, X86_EAX);
3072                         }
3073
3074                         if (ins->sreg2 != sreg2)
3075                                 x86_pop_reg (code, X86_EDX);
3076
3077                         break;
3078                 }
3079                 default:
3080                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3081                         g_assert_not_reached ();
3082                 }
3083
3084                 if ((code - cfg->native_code - offset) > max_len) {
3085                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3086                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3087                         g_assert_not_reached ();
3088                 }
3089                
3090                 cpos += max_len;
3091
3092                 last_ins = ins;
3093                 last_offset = offset;
3094                 
3095                 ins = ins->next;
3096         }
3097
3098         cfg->code_len = code - cfg->native_code;
3099 }
3100
3101 void
3102 mono_arch_register_lowlevel_calls (void)
3103 {
3104 }
3105
3106 void
3107 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3108 {
3109         MonoJumpInfo *patch_info;
3110         gboolean compile_aot = !run_cctors;
3111
3112         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3113                 unsigned char *ip = patch_info->ip.i + code;
3114                 const unsigned char *target;
3115
3116                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3117
3118                 if (compile_aot) {
3119                         switch (patch_info->type) {
3120                         case MONO_PATCH_INFO_BB:
3121                         case MONO_PATCH_INFO_LABEL:
3122                                 break;
3123                         default:
3124                                 /* No need to patch these */
3125                                 continue;
3126                         }
3127                 }
3128
3129                 switch (patch_info->type) {
3130                 case MONO_PATCH_INFO_IP:
3131                         *((gconstpointer *)(ip)) = target;
3132                         break;
3133                 case MONO_PATCH_INFO_CLASS_INIT: {
3134                         guint8 *code = ip;
3135                         /* Might already been changed to a nop */
3136                         x86_call_code (code, 0);
3137                         x86_patch (ip, target);
3138                         break;
3139                 }
3140                 case MONO_PATCH_INFO_ABS:
3141                 case MONO_PATCH_INFO_METHOD:
3142                 case MONO_PATCH_INFO_METHOD_JUMP:
3143                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3144                 case MONO_PATCH_INFO_BB:
3145                 case MONO_PATCH_INFO_LABEL:
3146                         x86_patch (ip, target);
3147                         break;
3148                 case MONO_PATCH_INFO_NONE:
3149                         break;
3150                 default: {
3151                         guint32 offset = mono_arch_get_patch_offset (ip);
3152                         *((gconstpointer *)(ip + offset)) = target;
3153                         break;
3154                 }
3155                 }
3156         }
3157 }
3158
3159 guint8 *
3160 mono_arch_emit_prolog (MonoCompile *cfg)
3161 {
3162         MonoMethod *method = cfg->method;
3163         MonoBasicBlock *bb;
3164         MonoMethodSignature *sig;
3165         MonoInst *inst;
3166         int alloc_size, pos, max_offset, i;
3167         guint8 *code;
3168
3169         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3170         code = cfg->native_code = g_malloc (cfg->code_size);
3171
3172         x86_push_reg (code, X86_EBP);
3173         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3174
3175         alloc_size = cfg->stack_offset;
3176         pos = 0;
3177
3178         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3179                 /* Might need to attach the thread to the JIT */
3180                 if (lmf_tls_offset != -1) {
3181                         guint8 *buf;
3182
3183                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3184                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3185                         buf = code;
3186                         x86_branch8 (code, X86_CC_NE, 0, 0);
3187                         x86_push_imm (code, cfg->domain);
3188                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3189                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3190                         x86_patch (buf, code);
3191 #ifdef PLATFORM_WIN32
3192                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3193                         /* FIXME: Add a separate key for LMF to avoid this */
3194                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3195 #endif
3196                 }
3197                 else {
3198                         g_assert (!cfg->compile_aot);
3199                         x86_push_imm (code, cfg->domain);
3200                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3201                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3202                 }
3203         }
3204
3205         if (method->save_lmf) {
3206                 pos += sizeof (MonoLMF);
3207
3208                 /* save the current IP */
3209                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3210                 x86_push_imm_template (code);
3211
3212                 /* save all caller saved regs */
3213                 x86_push_reg (code, X86_EBP);
3214                 x86_push_reg (code, X86_ESI);
3215                 x86_push_reg (code, X86_EDI);
3216                 x86_push_reg (code, X86_EBX);
3217
3218                 /* save method info */
3219                 x86_push_imm (code, method);
3220
3221                 /* get the address of lmf for the current thread */
3222                 /* 
3223                  * This is performance critical so we try to use some tricks to make
3224                  * it fast.
3225                  */
3226                 if (lmf_tls_offset != -1) {
3227                         /* Load lmf quicky using the GS register */
3228                         code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
3229 #ifdef PLATFORM_WIN32
3230                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3231                         /* FIXME: Add a separate key for LMF to avoid this */
3232                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3233 #endif
3234                 }
3235                 else {
3236                         if (cfg->compile_aot) {
3237                                 /* The GOT var does not exist yet */
3238                                 x86_call_imm (code, 0);
3239                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3240                                 x86_pop_reg (code, X86_EAX);
3241                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
3242                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3243                                 x86_call_membase (code, X86_EAX, 0xf0f0f0f0);
3244                         }
3245                         else
3246                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3247                 }
3248
3249                 /* push lmf */
3250                 x86_push_reg (code, X86_EAX); 
3251                 /* push *lfm (previous_lmf) */
3252                 x86_push_membase (code, X86_EAX, 0);
3253                 /* *(lmf) = ESP */
3254                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3255         } else {
3256
3257                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3258                         x86_push_reg (code, X86_EBX);
3259                         pos += 4;
3260                 }
3261
3262                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3263                         x86_push_reg (code, X86_EDI);
3264                         pos += 4;
3265                 }
3266
3267                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3268                         x86_push_reg (code, X86_ESI);
3269                         pos += 4;
3270                 }
3271         }
3272
3273         alloc_size -= pos;
3274
3275         if (alloc_size) {
3276                 /* See mono_emit_stack_alloc */
3277 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3278                 guint32 remaining_size = alloc_size;
3279                 while (remaining_size >= 0x1000) {
3280                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3281                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3282                         remaining_size -= 0x1000;
3283                 }
3284                 if (remaining_size)
3285                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3286 #else
3287                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3288 #endif
3289         }
3290
3291         /* compute max_offset in order to use short forward jumps */
3292         max_offset = 0;
3293         if (cfg->opt & MONO_OPT_BRANCH) {
3294                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3295                         MonoInst *ins = bb->code;
3296                         bb->max_offset = max_offset;
3297
3298                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3299                                 max_offset += 6;
3300                         /* max alignment for loops */
3301                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3302                                 max_offset += LOOP_ALIGNMENT;
3303
3304                         while (ins) {
3305                                 if (ins->opcode == OP_LABEL)
3306                                         ins->inst_c1 = max_offset;
3307                                 
3308                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3309                                 ins = ins->next;
3310                         }
3311                 }
3312         }
3313
3314         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3315                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3316
3317         /* load arguments allocated to register from the stack */
3318         sig = mono_method_signature (method);
3319         pos = 0;
3320
3321         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3322                 inst = cfg->varinfo [pos];
3323                 if (inst->opcode == OP_REGVAR) {
3324                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3325                         if (cfg->verbose_level > 2)
3326                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3327                 }
3328                 pos++;
3329         }
3330
3331         cfg->code_len = code - cfg->native_code;
3332
3333         return code;
3334 }
3335
3336 void
3337 mono_arch_emit_epilog (MonoCompile *cfg)
3338 {
3339         MonoMethod *method = cfg->method;
3340         MonoMethodSignature *sig = mono_method_signature (method);
3341         int quad, pos;
3342         guint32 stack_to_pop;
3343         guint8 *code;
3344         int max_epilog_size = 16;
3345         CallInfo *cinfo;
3346         
3347         if (cfg->method->save_lmf)
3348                 max_epilog_size += 128;
3349         
3350         if (mono_jit_trace_calls != NULL)
3351                 max_epilog_size += 50;
3352
3353         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3354                 cfg->code_size *= 2;
3355                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3356                 mono_jit_stats.code_reallocs++;
3357         }
3358
3359         code = cfg->native_code + cfg->code_len;
3360
3361         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3362                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3363
3364         /* the code restoring the registers must be kept in sync with CEE_JMP */
3365         pos = 0;
3366         
3367         if (method->save_lmf) {
3368                 gint32 prev_lmf_reg;
3369                 gint32 lmf_offset = -sizeof (MonoLMF);
3370
3371                 /* Find a spare register */
3372                 switch (sig->ret->type) {
3373                 case MONO_TYPE_I8:
3374                 case MONO_TYPE_U8:
3375                         prev_lmf_reg = X86_EDI;
3376                         cfg->used_int_regs |= (1 << X86_EDI);
3377                         break;
3378                 default:
3379                         prev_lmf_reg = X86_EDX;
3380                         break;
3381                 }
3382
3383                 /* reg = previous_lmf */
3384                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
3385
3386                 /* ecx = lmf */
3387                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
3388
3389                 /* *(lmf) = previous_lmf */
3390                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3391
3392                 /* restore caller saved regs */
3393                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3394                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
3395                 }
3396
3397                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3398                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
3399                 }
3400                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3401                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
3402                 }
3403
3404                 /* EBP is restored by LEAVE */
3405         } else {
3406                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3407                         pos -= 4;
3408                 }
3409                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3410                         pos -= 4;
3411                 }
3412                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3413                         pos -= 4;
3414                 }
3415
3416                 if (pos)
3417                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3418
3419                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3420                         x86_pop_reg (code, X86_ESI);
3421                 }
3422                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3423                         x86_pop_reg (code, X86_EDI);
3424                 }
3425                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3426                         x86_pop_reg (code, X86_EBX);
3427                 }
3428         }
3429
3430         /* Load returned vtypes into registers if needed */
3431         cinfo = get_call_info (sig, FALSE);
3432         if (cinfo->ret.storage == ArgValuetypeInReg) {
3433                 for (quad = 0; quad < 2; quad ++) {
3434                         switch (cinfo->ret.pair_storage [quad]) {
3435                         case ArgInIReg:
3436                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3437                                 break;
3438                         case ArgOnFloatFpStack:
3439                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3440                                 break;
3441                         case ArgOnDoubleFpStack:
3442                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3443                                 break;
3444                         case ArgNone:
3445                                 break;
3446                         default:
3447                                 g_assert_not_reached ();
3448                         }
3449                 }
3450         }
3451
3452         x86_leave (code);
3453
3454         if (CALLCONV_IS_STDCALL (sig)) {
3455                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3456
3457                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3458         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3459                 stack_to_pop = 4;
3460         else
3461                 stack_to_pop = 0;
3462
3463         if (stack_to_pop)
3464                 x86_ret_imm (code, stack_to_pop);
3465         else
3466                 x86_ret (code);
3467
3468         g_free (cinfo);
3469
3470         cfg->code_len = code - cfg->native_code;
3471
3472         g_assert (cfg->code_len < cfg->code_size);
3473 }
3474
3475 void
3476 mono_arch_emit_exceptions (MonoCompile *cfg)
3477 {
3478         MonoJumpInfo *patch_info;
3479         int nthrows, i;
3480         guint8 *code;
3481         MonoClass *exc_classes [16];
3482         guint8 *exc_throw_start [16], *exc_throw_end [16];
3483         guint32 code_size;
3484         int exc_count = 0;
3485
3486         /* Compute needed space */
3487         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3488                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3489                         exc_count++;
3490         }
3491
3492         /* 
3493          * make sure we have enough space for exceptions
3494          * 16 is the size of two push_imm instructions and a call
3495          */
3496         if (cfg->compile_aot)
3497                 code_size = exc_count * 32;
3498         else
3499                 code_size = exc_count * 16;
3500
3501         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3502                 cfg->code_size *= 2;
3503                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3504                 mono_jit_stats.code_reallocs++;
3505         }
3506
3507         code = cfg->native_code + cfg->code_len;
3508
3509         nthrows = 0;
3510         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3511                 switch (patch_info->type) {
3512                 case MONO_PATCH_INFO_EXC: {
3513                         MonoClass *exc_class;
3514                         guint8 *buf, *buf2;
3515                         guint32 throw_ip;
3516
3517                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3518
3519                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3520                         g_assert (exc_class);
3521                         throw_ip = patch_info->ip.i;
3522
3523                         /* Find a throw sequence for the same exception class */
3524                         for (i = 0; i < nthrows; ++i)
3525                                 if (exc_classes [i] == exc_class)
3526                                         break;
3527                         if (i < nthrows) {
3528                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3529                                 x86_jump_code (code, exc_throw_start [i]);
3530                                 patch_info->type = MONO_PATCH_INFO_NONE;
3531                         }
3532                         else {
3533                                 guint32 got_reg = X86_EAX;
3534                                 guint32 size;
3535
3536                                 /* Compute size of code following the push <OFFSET> */
3537                                 if (cfg->compile_aot) {
3538                                         size = 5 + 6;
3539                                         if (!cfg->got_var)
3540                                                 size += 32;
3541                                         else if (cfg->got_var->opcode == OP_REGOFFSET)
3542                                                 size += 6;
3543                                 }
3544                                 else
3545                                         size = 5 + 5;
3546
3547                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3548                                         /* Use the shorter form */
3549                                         buf = buf2 = code;
3550                                         x86_push_imm (code, 0);
3551                                 }
3552                                 else {
3553                                         buf = code;
3554                                         x86_push_imm (code, 0xf0f0f0f0);
3555                                         buf2 = code;
3556                                 }
3557
3558                                 if (nthrows < 16) {
3559                                         exc_classes [nthrows] = exc_class;
3560                                         exc_throw_start [nthrows] = code;
3561                                 }
3562
3563                                 if (cfg->compile_aot) {          
3564                                         /*
3565                                          * Since the patches are generated by the back end, there is                                     * no way to generate a got_var at this point.   
3566                                          */
3567                                         if (!cfg->got_var) {
3568                                                 x86_call_imm (code, 0);
3569                                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3570                                                 x86_pop_reg (code, X86_EAX);
3571                                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
3572                                         }
3573                                         else {
3574                                                 if (cfg->got_var->opcode == OP_REGOFFSET)
3575                                                         x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
3576                                                 else
3577                                                         got_reg = cfg->got_var->dreg;
3578                                         }
3579                                 }
3580
3581                                 x86_push_imm (code, exc_class->type_token);
3582                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3583                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3584                                 patch_info->ip.i = code - cfg->native_code;
3585                                 if (cfg->compile_aot)
3586                                         x86_call_membase (code, got_reg, 0xf0f0f0f0);
3587                                 else
3588                                         x86_call_code (code, 0);
3589                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3590                                 while (buf < buf2)
3591                                         x86_nop (buf);
3592
3593                                 if (nthrows < 16) {
3594                                         exc_throw_end [nthrows] = code;
3595                                         nthrows ++;
3596                                 }
3597                         }
3598                         break;
3599                 }
3600                 default:
3601                         /* do nothing */
3602                         break;
3603                 }
3604         }
3605
3606         cfg->code_len = code - cfg->native_code;
3607
3608         g_assert (cfg->code_len < cfg->code_size);
3609 }
3610
3611 void
3612 mono_arch_flush_icache (guint8 *code, gint size)
3613 {
3614         /* not needed */
3615 }
3616
3617 void
3618 mono_arch_flush_register_windows (void)
3619 {
3620 }
3621
3622 /*
3623  * Support for fast access to the thread-local lmf structure using the GS
3624  * segment register on NPTL + kernel 2.6.x.
3625  */
3626
3627 static gboolean tls_offset_inited = FALSE;
3628
3629 void
3630 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3631 {
3632         if (!tls_offset_inited) {
3633                 if (!getenv ("MONO_NO_TLS")) {
3634 #ifdef PLATFORM_WIN32
3635                         /* 
3636                          * We need to init this multiple times, since when we are first called, the key might not
3637                          * be initialized yet.
3638                          */
3639                         appdomain_tls_offset = mono_domain_get_tls_key ();
3640                         lmf_tls_offset = mono_get_jit_tls_key ();
3641                         thread_tls_offset = mono_thread_get_tls_key ();
3642
3643                         /* Only 64 tls entries can be accessed using inline code */
3644                         if (appdomain_tls_offset >= 64)
3645                                 appdomain_tls_offset = -1;
3646                         if (lmf_tls_offset >= 64)
3647                                 lmf_tls_offset = -1;
3648                         if (thread_tls_offset >= 64)
3649                                 thread_tls_offset = -1;
3650 #else
3651                         tls_offset_inited = TRUE;
3652                         appdomain_tls_offset = mono_domain_get_tls_offset ();
3653                         lmf_tls_offset = mono_get_lmf_tls_offset ();
3654                         thread_tls_offset = mono_thread_get_tls_offset ();
3655 #endif
3656                 }
3657         }               
3658 }
3659
3660 void
3661 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
3662 {
3663 }
3664
3665 void
3666 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3667 {
3668         MonoCallInst *call = (MonoCallInst*)inst;
3669         CallInfo *cinfo = get_call_info (inst->signature, FALSE);
3670
3671         /* add the this argument */
3672         if (this_reg != -1) {
3673                 if (cinfo->args [0].storage == ArgInIReg) {
3674                         MonoInst *this;
3675                         MONO_INST_NEW (cfg, this, OP_MOVE);
3676                         this->type = this_type;
3677                         this->sreg1 = this_reg;
3678                         this->dreg = mono_regstate_next_int (cfg->rs);
3679                         mono_bblock_add_inst (cfg->cbb, this);
3680
3681                         mono_call_inst_add_outarg_reg (call, this->dreg, cinfo->args [0].reg, FALSE);
3682                 }
3683                 else {
3684                         MonoInst *this;
3685                         MONO_INST_NEW (cfg, this, OP_OUTARG);
3686                         this->type = this_type;
3687                         this->sreg1 = this_reg;
3688                         mono_bblock_add_inst (cfg->cbb, this);
3689                 }
3690         }
3691
3692         if (vt_reg != -1) {
3693                 MonoInst *vtarg;
3694
3695                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3696                         /*
3697                          * The valuetype is in EAX:EDX after the call, needs to be copied to
3698                          * the stack. Save the address here, so the call instruction can
3699                          * access it.
3700                          */
3701                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
3702                         vtarg->inst_destbasereg = X86_ESP;
3703                         vtarg->inst_offset = inst->stack_usage;
3704                         vtarg->sreg1 = vt_reg;
3705                         mono_bblock_add_inst (cfg->cbb, vtarg);
3706                 }
3707                 else if (cinfo->ret.storage == ArgInIReg) {
3708                         /* The return address is passed in a register */
3709                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
3710                         vtarg->sreg1 = vt_reg;
3711                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
3712                         mono_bblock_add_inst (cfg->cbb, vtarg);
3713
3714                         mono_call_inst_add_outarg_reg (call, vtarg->dreg, cinfo->ret.reg, FALSE);
3715                 } else {
3716                         MonoInst *vtarg;
3717                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3718                         vtarg->type = STACK_MP;
3719                         vtarg->sreg1 = vt_reg;
3720                         mono_bblock_add_inst (cfg->cbb, vtarg);
3721                 }
3722         }
3723
3724         g_free (cinfo);
3725 }
3726
3727 MonoInst*
3728 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3729 {
3730         MonoInst *ins = NULL;
3731
3732         if (cmethod->klass == mono_defaults.math_class) {
3733                 if (strcmp (cmethod->name, "Sin") == 0) {
3734                         MONO_INST_NEW (cfg, ins, OP_SIN);
3735                         ins->inst_i0 = args [0];
3736                 } else if (strcmp (cmethod->name, "Cos") == 0) {
3737                         MONO_INST_NEW (cfg, ins, OP_COS);
3738                         ins->inst_i0 = args [0];
3739                 } else if (strcmp (cmethod->name, "Tan") == 0) {
3740                         MONO_INST_NEW (cfg, ins, OP_TAN);
3741                         ins->inst_i0 = args [0];
3742                 } else if (strcmp (cmethod->name, "Atan") == 0) {
3743                         MONO_INST_NEW (cfg, ins, OP_ATAN);
3744                         ins->inst_i0 = args [0];
3745                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
3746                         MONO_INST_NEW (cfg, ins, OP_SQRT);
3747                         ins->inst_i0 = args [0];
3748                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
3749                         MONO_INST_NEW (cfg, ins, OP_ABS);
3750                         ins->inst_i0 = args [0];
3751                 }
3752 #if 0
3753                 /* OP_FREM is not IEEE compatible */
3754                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
3755                         MONO_INST_NEW (cfg, ins, OP_FREM);
3756                         ins->inst_i0 = args [0];
3757                         ins->inst_i1 = args [1];
3758                 }
3759 #endif
3760         } else if (cmethod->klass == mono_defaults.thread_class &&
3761                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
3762                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
3763         } else if(cmethod->klass->image == mono_defaults.corlib &&
3764                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
3765                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
3766
3767                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3768                         MonoInst *ins_iconst;
3769
3770                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3771                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3772                         ins_iconst->inst_c0 = 1;
3773
3774                         ins->inst_i0 = args [0];
3775                         ins->inst_i1 = ins_iconst;
3776                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3777                         MonoInst *ins_iconst;
3778
3779                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3780                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3781                         ins_iconst->inst_c0 = -1;
3782
3783                         ins->inst_i0 = args [0];
3784                         ins->inst_i1 = ins_iconst;
3785                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3786                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
3787
3788                         ins->inst_i0 = args [0];
3789                         ins->inst_i1 = args [1];
3790                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3791                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_I4);
3792
3793                         ins->inst_i0 = args [0];
3794                         ins->inst_i1 = args [1];
3795                 }
3796         }
3797
3798         return ins;
3799 }
3800
3801
3802 gboolean
3803 mono_arch_print_tree (MonoInst *tree, int arity)
3804 {
3805         return 0;
3806 }
3807
3808 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
3809 {
3810         MonoInst* ins;
3811         
3812         if (appdomain_tls_offset == -1)
3813                 return NULL;
3814
3815         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3816         ins->inst_offset = appdomain_tls_offset;
3817         return ins;
3818 }
3819
3820 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
3821 {
3822         MonoInst* ins;
3823
3824         if (thread_tls_offset == -1)
3825                 return NULL;
3826
3827         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3828         ins->inst_offset = thread_tls_offset;
3829         return ins;
3830 }
3831
3832 guint32
3833 mono_arch_get_patch_offset (guint8 *code)
3834 {
3835         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
3836                 return 2;
3837         else if ((code [0] == 0xba))
3838                 return 1;
3839         else if ((code [0] == 0x68))
3840                 /* push IMM */
3841                 return 1;
3842         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
3843                 /* push <OFFSET>(<REG>) */
3844                 return 2;
3845         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
3846                 /* call *<OFFSET>(<REG>) */
3847                 return 2;
3848         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
3849                 /* fldl <ADDR> */
3850                 return 2;
3851         else if ((code [0] == 0x58) && (code [1] == 0x05))
3852                 /* pop %eax; add <OFFSET>, %eax */
3853                 return 2;
3854         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
3855                 /* pop <REG>; add <OFFSET>, <REG> */
3856                 return 3;
3857         else {
3858                 g_assert_not_reached ();
3859                 return -1;
3860         }
3861 }
3862
3863 gpointer*
3864 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
3865 {
3866         guint8 reg = 0;
3867         gint32 disp = 0;
3868
3869         /* go to the start of the call instruction
3870          *
3871          * address_byte = (m << 6) | (o << 3) | reg
3872          * call opcode: 0xff address_byte displacement
3873          * 0xff m=1,o=2 imm8
3874          * 0xff m=2,o=2 imm32
3875          */
3876         code -= 6;
3877         if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
3878                 reg = code [4] & 0x07;
3879                 disp = (signed char)code [5];
3880         } else {
3881                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
3882                         reg = code [1] & 0x07;
3883                         disp = *((gint32*)(code + 2));
3884                 } else if ((code [1] == 0xe8)) {
3885                         return NULL;
3886                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
3887                         /*
3888                          * This is a interface call: should check the above code can't catch it earlier 
3889                          * 8b 40 30   mov    0x30(%eax),%eax
3890                          * ff 10      call   *(%eax)
3891                          */
3892                         disp = 0;
3893                         reg = code [5] & 0x07;
3894                 }
3895                 else
3896                         return NULL;
3897         }
3898
3899         return (gpointer*)(((gint32)(regs [reg])) + disp);
3900 }
3901
3902 gpointer* 
3903 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
3904 {
3905         guint8 reg = 0;
3906         gint32 disp = 0;
3907
3908         code -= 7;
3909         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
3910                 reg = x86_modrm_rm (code [1]);
3911                 disp = code [4];
3912
3913                 if (reg == X86_EAX)
3914                         return NULL;
3915                 else
3916                         return (gpointer*)(((gint32)(regs [reg])) + disp);
3917         }
3918
3919         return NULL;
3920 }