2005-03-29 Zoltan Varga <vargaz@freemail.hu>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14
15 #include <mono/metadata/appdomain.h>
16 #include <mono/metadata/debug-helpers.h>
17 #include <mono/metadata/threads.h>
18 #include <mono/metadata/profiler-private.h>
19 #include <mono/utils/mono-math.h>
20
21 #include "trace.h"
22 #include "mini-x86.h"
23 #include "inssel.h"
24 #include "cpu-pentium.h"
25
26 /* On windows, these hold the key returned by TlsAlloc () */
27 static gint lmf_tls_offset = -1;
28 static gint appdomain_tls_offset = -1;
29 static gint thread_tls_offset = -1;
30
31 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
32
33 #ifdef PLATFORM_WIN32
34 /* Under windows, the default pinvoke calling convention is stdcall */
35 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
36 #else
37 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
38 #endif
39
40 #define SIGNAL_STACK_SIZE (64 * 1024)
41
42 #define NOT_IMPLEMENTED g_assert_not_reached ()
43
44 const char*
45 mono_arch_regname (int reg) {
46         switch (reg) {
47         case X86_EAX: return "%eax";
48         case X86_EBX: return "%ebx";
49         case X86_ECX: return "%ecx";
50         case X86_EDX: return "%edx";
51         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
52         case X86_EDI: return "%edi";
53         case X86_ESI: return "%esi";
54         }
55         return "unknown";
56 }
57
58 typedef enum {
59         ArgInIReg,
60         ArgInFloatSSEReg,
61         ArgInDoubleSSEReg,
62         ArgOnStack,
63         ArgValuetypeInReg,
64         ArgOnFloatFpStack,
65         ArgOnDoubleFpStack,
66         ArgNone
67 } ArgStorage;
68
69 typedef struct {
70         gint16 offset;
71         gint8  reg;
72         ArgStorage storage;
73
74         /* Only if storage == ArgValuetypeInReg */
75         ArgStorage pair_storage [2];
76         gint8 pair_regs [2];
77 } ArgInfo;
78
79 typedef struct {
80         int nargs;
81         guint32 stack_usage;
82         guint32 reg_usage;
83         guint32 freg_usage;
84         gboolean need_stack_align;
85         ArgInfo ret;
86         ArgInfo sig_cookie;
87         ArgInfo args [1];
88 } CallInfo;
89
90 #define PARAM_REGS 0
91
92 #define FLOAT_PARAM_REGS 0
93
94 static X86_Reg_No param_regs [] = { 0 };
95
96 #ifdef PLATFORM_WIN32
97 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
98 #endif
99
100 static void inline
101 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
102 {
103     ainfo->offset = *stack_size;
104
105     if (*gr >= PARAM_REGS) {
106                 ainfo->storage = ArgOnStack;
107                 (*stack_size) += sizeof (gpointer);
108     }
109     else {
110                 ainfo->storage = ArgInIReg;
111                 ainfo->reg = param_regs [*gr];
112                 (*gr) ++;
113     }
114 }
115
116 static void inline
117 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
118 {
119         ainfo->offset = *stack_size;
120
121         g_assert (PARAM_REGS == 0);
122         
123         ainfo->storage = ArgOnStack;
124         (*stack_size) += sizeof (gpointer) * 2;
125 }
126
127 static void inline
128 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
129 {
130     ainfo->offset = *stack_size;
131
132     if (*gr >= FLOAT_PARAM_REGS) {
133                 ainfo->storage = ArgOnStack;
134                 (*stack_size) += sizeof (gpointer);
135     }
136     else {
137                 /* A double register */
138                 if (is_double)
139                         ainfo->storage = ArgInDoubleSSEReg;
140                 else
141                         ainfo->storage = ArgInFloatSSEReg;
142                 ainfo->reg = *gr;
143                 (*gr) += 1;
144     }
145 }
146
147
148 static void
149 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
150                gboolean is_return,
151                guint32 *gr, guint32 *fr, guint32 *stack_size)
152 {
153         guint32 size;
154         MonoClass *klass;
155
156         klass = mono_class_from_mono_type (type);
157         if (sig->pinvoke) 
158                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
159         else 
160                 size = mono_type_stack_size (&klass->byval_arg, NULL);
161
162 #ifdef PLATFORM_WIN32
163         if (sig->pinvoke && is_return) {
164                 MonoMarshalType *info;
165
166                 /*
167                  * the exact rules are not very well documented, the code below seems to work with the 
168                  * code generated by gcc 3.3.3 -mno-cygwin.
169                  */
170                 info = mono_marshal_load_type_info (klass);
171                 g_assert (info);
172
173                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
174
175                 /* Special case structs with only a float member */
176                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
177                         ainfo->storage = ArgValuetypeInReg;
178                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
179                         return;
180                 }
181                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
182                         ainfo->storage = ArgValuetypeInReg;
183                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
184                         return;
185                 }               
186                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
187                         ainfo->storage = ArgValuetypeInReg;
188                         ainfo->pair_storage [0] = ArgInIReg;
189                         ainfo->pair_regs [0] = return_regs [0];
190                         if (info->native_size > 4) {
191                                 ainfo->pair_storage [1] = ArgInIReg;
192                                 ainfo->pair_regs [1] = return_regs [1];
193                         }
194                         return;
195                 }
196         }
197 #endif
198
199         ainfo->offset = *stack_size;
200         ainfo->storage = ArgOnStack;
201         *stack_size += ALIGN_TO (size, sizeof (gpointer));
202 }
203
204 /*
205  * get_call_info:
206  *
207  *  Obtain information about a call according to the calling convention.
208  * For x86 ELF, see the "System V Application Binary Interface Intel386 
209  * Architecture Processor Supplment, Fourth Edition" document for more
210  * information.
211  * For x86 win32, see ???.
212  */
213 static CallInfo*
214 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
215 {
216         guint32 i, gr, fr;
217         MonoType *ret_type;
218         int n = sig->hasthis + sig->param_count;
219         guint32 stack_size = 0;
220         CallInfo *cinfo;
221
222         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
223
224         gr = 0;
225         fr = 0;
226
227         /* return value */
228         {
229                 ret_type = mono_type_get_underlying_type (sig->ret);
230                 switch (ret_type->type) {
231                 case MONO_TYPE_BOOLEAN:
232                 case MONO_TYPE_I1:
233                 case MONO_TYPE_U1:
234                 case MONO_TYPE_I2:
235                 case MONO_TYPE_U2:
236                 case MONO_TYPE_CHAR:
237                 case MONO_TYPE_I4:
238                 case MONO_TYPE_U4:
239                 case MONO_TYPE_I:
240                 case MONO_TYPE_U:
241                 case MONO_TYPE_PTR:
242                 case MONO_TYPE_FNPTR:
243                 case MONO_TYPE_CLASS:
244                 case MONO_TYPE_OBJECT:
245                 case MONO_TYPE_SZARRAY:
246                 case MONO_TYPE_ARRAY:
247                 case MONO_TYPE_STRING:
248                         cinfo->ret.storage = ArgInIReg;
249                         cinfo->ret.reg = X86_EAX;
250                         break;
251                 case MONO_TYPE_U8:
252                 case MONO_TYPE_I8:
253                         cinfo->ret.storage = ArgInIReg;
254                         cinfo->ret.reg = X86_EAX;
255                         break;
256                 case MONO_TYPE_R4:
257                         cinfo->ret.storage = ArgOnFloatFpStack;
258                         break;
259                 case MONO_TYPE_R8:
260                         cinfo->ret.storage = ArgOnDoubleFpStack;
261                         break;
262                 case MONO_TYPE_VALUETYPE: {
263                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
264
265                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
266                         if (cinfo->ret.storage == ArgOnStack)
267                                 /* The caller passes the address where the value is stored */
268                                 add_general (&gr, &stack_size, &cinfo->ret);
269                         break;
270                 }
271                 case MONO_TYPE_TYPEDBYREF:
272                         /* Same as a valuetype with size 24 */
273                         add_general (&gr, &stack_size, &cinfo->ret);
274                         ;
275                         break;
276                 case MONO_TYPE_VOID:
277                         cinfo->ret.storage = ArgNone;
278                         break;
279                 default:
280                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
281                 }
282         }
283
284         /* this */
285         if (sig->hasthis)
286                 add_general (&gr, &stack_size, cinfo->args + 0);
287
288         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
289                 gr = PARAM_REGS;
290                 fr = FLOAT_PARAM_REGS;
291                 
292                 /* Emit the signature cookie just before the implicit arguments */
293                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
294         }
295
296         for (i = 0; i < sig->param_count; ++i) {
297                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
298                 MonoType *ptype;
299
300                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
301                         /* We allways pass the sig cookie on the stack for simplicity */
302                         /* 
303                          * Prevent implicit arguments + the sig cookie from being passed 
304                          * in registers.
305                          */
306                         gr = PARAM_REGS;
307                         fr = FLOAT_PARAM_REGS;
308
309                         /* Emit the signature cookie just before the implicit arguments */
310                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
311                 }
312
313                 if (sig->params [i]->byref) {
314                         add_general (&gr, &stack_size, ainfo);
315                         continue;
316                 }
317                 ptype = mono_type_get_underlying_type (sig->params [i]);
318                 switch (ptype->type) {
319                 case MONO_TYPE_BOOLEAN:
320                 case MONO_TYPE_I1:
321                 case MONO_TYPE_U1:
322                         add_general (&gr, &stack_size, ainfo);
323                         break;
324                 case MONO_TYPE_I2:
325                 case MONO_TYPE_U2:
326                 case MONO_TYPE_CHAR:
327                         add_general (&gr, &stack_size, ainfo);
328                         break;
329                 case MONO_TYPE_I4:
330                 case MONO_TYPE_U4:
331                         add_general (&gr, &stack_size, ainfo);
332                         break;
333                 case MONO_TYPE_I:
334                 case MONO_TYPE_U:
335                 case MONO_TYPE_PTR:
336                 case MONO_TYPE_FNPTR:
337                 case MONO_TYPE_CLASS:
338                 case MONO_TYPE_OBJECT:
339                 case MONO_TYPE_STRING:
340                 case MONO_TYPE_SZARRAY:
341                 case MONO_TYPE_ARRAY:
342                         add_general (&gr, &stack_size, ainfo);
343                         break;
344                 case MONO_TYPE_VALUETYPE:
345                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
346                         break;
347                 case MONO_TYPE_TYPEDBYREF:
348                         stack_size += sizeof (MonoTypedRef);
349                         ainfo->storage = ArgOnStack;
350                         break;
351                 case MONO_TYPE_U8:
352                 case MONO_TYPE_I8:
353                         add_general_pair (&gr, &stack_size, ainfo);
354                         break;
355                 case MONO_TYPE_R4:
356                         add_float (&fr, &stack_size, ainfo, FALSE);
357                         break;
358                 case MONO_TYPE_R8:
359                         add_float (&fr, &stack_size, ainfo, TRUE);
360                         break;
361                 default:
362                         g_error ("unexpected type 0x%x", ptype->type);
363                         g_assert_not_reached ();
364                 }
365         }
366
367         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
368                 gr = PARAM_REGS;
369                 fr = FLOAT_PARAM_REGS;
370                 
371                 /* Emit the signature cookie just before the implicit arguments */
372                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
373         }
374
375         cinfo->stack_usage = stack_size;
376         cinfo->reg_usage = gr;
377         cinfo->freg_usage = fr;
378         return cinfo;
379 }
380
381 /*
382  * mono_arch_get_argument_info:
383  * @csig:  a method signature
384  * @param_count: the number of parameters to consider
385  * @arg_info: an array to store the result infos
386  *
387  * Gathers information on parameters such as size, alignment and
388  * padding. arg_info should be large enought to hold param_count + 1 entries. 
389  *
390  * Returns the size of the activation frame.
391  */
392 int
393 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
394 {
395         int k, frame_size = 0;
396         int size, align, pad;
397         int offset = 8;
398         CallInfo *cinfo;
399
400         cinfo = get_call_info (csig, FALSE);
401
402         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
403                 frame_size += sizeof (gpointer);
404                 offset += 4;
405         }
406
407         arg_info [0].offset = offset;
408
409         if (csig->hasthis) {
410                 frame_size += sizeof (gpointer);
411                 offset += 4;
412         }
413
414         arg_info [0].size = frame_size;
415
416         for (k = 0; k < param_count; k++) {
417                 
418                 if (csig->pinvoke)
419                         size = mono_type_native_stack_size (csig->params [k], &align);
420                 else
421                         size = mono_type_stack_size (csig->params [k], &align);
422
423                 /* ignore alignment for now */
424                 align = 1;
425
426                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
427                 arg_info [k].pad = pad;
428                 frame_size += size;
429                 arg_info [k + 1].pad = 0;
430                 arg_info [k + 1].size = size;
431                 offset += pad;
432                 arg_info [k + 1].offset = offset;
433                 offset += size;
434         }
435
436         align = MONO_ARCH_FRAME_ALIGNMENT;
437         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
438         arg_info [k].pad = pad;
439
440         g_free (cinfo);
441
442         return frame_size;
443 }
444
445 static const guchar cpuid_impl [] = {
446         0x55,                           /* push   %ebp */
447         0x89, 0xe5,                     /* mov    %esp,%ebp */
448         0x53,                           /* push   %ebx */
449         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
450         0x0f, 0xa2,                     /* cpuid   */
451         0x50,                           /* push   %eax */
452         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
453         0x89, 0x18,                     /* mov    %ebx,(%eax) */
454         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
455         0x89, 0x08,                     /* mov    %ecx,(%eax) */
456         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
457         0x89, 0x10,                     /* mov    %edx,(%eax) */
458         0x58,                           /* pop    %eax */
459         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
460         0x89, 0x02,                     /* mov    %eax,(%edx) */
461         0x5b,                           /* pop    %ebx */
462         0xc9,                           /* leave   */
463         0xc3,                           /* ret     */
464 };
465
466 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
467
468 static int 
469 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
470 {
471         int have_cpuid = 0;
472 #ifndef _MSC_VER
473         __asm__  __volatile__ (
474                 "pushfl\n"
475                 "popl %%eax\n"
476                 "movl %%eax, %%edx\n"
477                 "xorl $0x200000, %%eax\n"
478                 "pushl %%eax\n"
479                 "popfl\n"
480                 "pushfl\n"
481                 "popl %%eax\n"
482                 "xorl %%edx, %%eax\n"
483                 "andl $0x200000, %%eax\n"
484                 "movl %%eax, %0"
485                 : "=r" (have_cpuid)
486                 :
487                 : "%eax", "%edx"
488         );
489 #else
490         __asm {
491                 pushfd
492                 pop eax
493                 mov edx, eax
494                 xor eax, 0x200000
495                 push eax
496                 popfd
497                 pushfd
498                 pop eax
499                 xor eax, edx
500                 and eax, 0x200000
501                 mov have_cpuid, eax
502         }
503 #endif
504         if (have_cpuid) {
505                 /* Have to use the code manager to get around WinXP DEP */
506                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
507                 CpuidFunc func;
508                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
509                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
510
511                 func = (CpuidFunc)ptr;
512                 func (id, p_eax, p_ebx, p_ecx, p_edx);
513
514                 mono_code_manager_destroy (codeman);
515
516                 /*
517                  * We use this approach because of issues with gcc and pic code, see:
518                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
519                 __asm__ __volatile__ ("cpuid"
520                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
521                         : "a" (id));
522                 */
523                 return 1;
524         }
525         return 0;
526 }
527
528 /*
529  * Initialize the cpu to execute managed code.
530  */
531 void
532 mono_arch_cpu_init (void)
533 {
534         /* spec compliance requires running with double precision */
535 #ifndef _MSC_VER
536         guint16 fpcw;
537
538         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
539         fpcw &= ~X86_FPCW_PRECC_MASK;
540         fpcw |= X86_FPCW_PREC_DOUBLE;
541         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
542         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
543 #else
544         _control87 (_PC_64, MCW_PC);
545 #endif
546 }
547
548 /*
549  * This function returns the optimizations supported on this cpu.
550  */
551 guint32
552 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
553 {
554         int eax, ebx, ecx, edx;
555         guint32 opts = 0;
556         
557         *exclude_mask = 0;
558         /* Feature Flags function, flags returned in EDX. */
559         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
560                 if (edx & (1 << 15)) {
561                         opts |= MONO_OPT_CMOV;
562                         if (edx & 1)
563                                 opts |= MONO_OPT_FCMOV;
564                         else
565                                 *exclude_mask |= MONO_OPT_FCMOV;
566                 } else
567                         *exclude_mask |= MONO_OPT_CMOV;
568         }
569         return opts;
570 }
571
572 /*
573  * Determine whenever the trap whose info is in SIGINFO is caused by
574  * integer overflow.
575  */
576 gboolean
577 mono_arch_is_int_overflow (void *sigctx, void *info)
578 {
579         struct sigcontext *ctx = (struct sigcontext*)sigctx;
580         guint8* ip;
581
582         ip = (guint8*)ctx->SC_EIP;
583
584         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
585                 gint32 reg;
586
587                 /* idiv REG */
588                 switch (x86_modrm_rm (ip [1])) {
589                 case X86_ECX:
590                         reg = ctx->SC_ECX;
591                         break;
592                 case X86_EBX:
593                         reg = ctx->SC_EBX;
594                         break;
595                 default:
596                         g_assert_not_reached ();
597                         reg = -1;
598                 }
599
600                 if (reg == -1)
601                         return TRUE;
602         }
603                         
604         return FALSE;
605 }
606
607 static gboolean
608 is_regsize_var (MonoType *t) {
609         if (t->byref)
610                 return TRUE;
611         switch (mono_type_get_underlying_type (t)->type) {
612         case MONO_TYPE_I4:
613         case MONO_TYPE_U4:
614         case MONO_TYPE_I:
615         case MONO_TYPE_U:
616         case MONO_TYPE_PTR:
617         case MONO_TYPE_FNPTR:
618                 return TRUE;
619         case MONO_TYPE_OBJECT:
620         case MONO_TYPE_STRING:
621         case MONO_TYPE_CLASS:
622         case MONO_TYPE_SZARRAY:
623         case MONO_TYPE_ARRAY:
624                 return TRUE;
625         case MONO_TYPE_VALUETYPE:
626                 return FALSE;
627         }
628         return FALSE;
629 }
630
631 GList *
632 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
633 {
634         GList *vars = NULL;
635         int i;
636
637         for (i = 0; i < cfg->num_varinfo; i++) {
638                 MonoInst *ins = cfg->varinfo [i];
639                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
640
641                 /* unused vars */
642                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
643                         continue;
644
645                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
646                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
647                         continue;
648
649                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
650                  * 8bit quantities in caller saved registers on x86 */
651                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
652                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
653                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
654                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
655                         g_assert (i == vmv->idx);
656                         vars = g_list_prepend (vars, vmv);
657                 }
658         }
659
660         vars = mono_varlist_sort (cfg, vars, 0);
661
662         return vars;
663 }
664
665 GList *
666 mono_arch_get_global_int_regs (MonoCompile *cfg)
667 {
668         GList *regs = NULL;
669
670         /* we can use 3 registers for global allocation */
671         regs = g_list_prepend (regs, (gpointer)X86_EBX);
672         regs = g_list_prepend (regs, (gpointer)X86_ESI);
673         regs = g_list_prepend (regs, (gpointer)X86_EDI);
674
675         return regs;
676 }
677
678 /*
679  * mono_arch_regalloc_cost:
680  *
681  *  Return the cost, in number of memory references, of the action of 
682  * allocating the variable VMV into a register during global register
683  * allocation.
684  */
685 guint32
686 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
687 {
688         MonoInst *ins = cfg->varinfo [vmv->idx];
689
690         if (cfg->method->save_lmf)
691                 /* The register is already saved */
692                 return (ins->opcode == OP_ARG) ? 1 : 0;
693         else
694                 /* push+pop+possible load if it is an argument */
695                 return (ins->opcode == OP_ARG) ? 3 : 2;
696 }
697  
698 /*
699  * Set var information according to the calling convention. X86 version.
700  * The locals var stuff should most likely be split in another method.
701  */
702 void
703 mono_arch_allocate_vars (MonoCompile *m)
704 {
705         MonoMethodSignature *sig;
706         MonoMethodHeader *header;
707         MonoInst *inst;
708         guint32 locals_stack_size, locals_stack_align;
709         int i, offset, curinst, size, align;
710         gint32 *offsets;
711         CallInfo *cinfo;
712
713         header = mono_method_get_header (m->method);
714         sig = mono_method_signature (m->method);
715
716         offset = 8;
717         curinst = 0;
718
719         cinfo = get_call_info (sig, FALSE);
720
721         switch (cinfo->ret.storage) {
722         case ArgOnStack:
723                 m->ret->opcode = OP_REGOFFSET;
724                 m->ret->inst_basereg = X86_EBP;
725                 m->ret->inst_offset = offset;
726                 offset += sizeof (gpointer);
727                 break;
728         case ArgValuetypeInReg:
729                 break;
730         case ArgInIReg:
731                 m->ret->opcode = OP_REGVAR;
732                 m->ret->inst_c0 = cinfo->ret.reg;
733                 break;
734         case ArgNone:
735         case ArgOnFloatFpStack:
736         case ArgOnDoubleFpStack:
737                 break;
738         default:
739                 g_assert_not_reached ();
740         }
741
742         if (sig->hasthis) {
743                 inst = m->varinfo [curinst];
744                 if (inst->opcode != OP_REGVAR) {
745                         inst->opcode = OP_REGOFFSET;
746                         inst->inst_basereg = X86_EBP;
747                 }
748                 inst->inst_offset = offset;
749                 offset += sizeof (gpointer);
750                 curinst++;
751         }
752
753         if (sig->call_convention == MONO_CALL_VARARG) {
754                 m->sig_cookie = offset;
755                 offset += sizeof (gpointer);
756         }
757
758         for (i = 0; i < sig->param_count; ++i) {
759                 inst = m->varinfo [curinst];
760                 if (inst->opcode != OP_REGVAR) {
761                         inst->opcode = OP_REGOFFSET;
762                         inst->inst_basereg = X86_EBP;
763                 }
764                 inst->inst_offset = offset;
765                 size = mono_type_size (sig->params [i], &align);
766                 size += 4 - 1;
767                 size &= ~(4 - 1);
768                 offset += size;
769                 curinst++;
770         }
771
772         offset = 0;
773
774         /* reserve space to save LMF and caller saved registers */
775
776         if (m->method->save_lmf) {
777                 offset += sizeof (MonoLMF);
778         } else {
779                 if (m->used_int_regs & (1 << X86_EBX)) {
780                         offset += 4;
781                 }
782
783                 if (m->used_int_regs & (1 << X86_EDI)) {
784                         offset += 4;
785                 }
786
787                 if (m->used_int_regs & (1 << X86_ESI)) {
788                         offset += 4;
789                 }
790         }
791
792         switch (cinfo->ret.storage) {
793         case ArgValuetypeInReg:
794                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
795                 offset += 8;
796                 m->ret->opcode = OP_REGOFFSET;
797                 m->ret->inst_basereg = X86_EBP;
798                 m->ret->inst_offset = - offset;
799                 break;
800         default:
801                 break;
802         }
803
804         /* Allocate locals */
805         offsets = mono_allocate_stack_slots (m, &locals_stack_size, &locals_stack_align);
806         if (locals_stack_align) {
807                 offset += (locals_stack_align - 1);
808                 offset &= ~(locals_stack_align - 1);
809         }
810         for (i = m->locals_start; i < m->num_varinfo; i++) {
811                 if (offsets [i] != -1) {
812                         MonoInst *inst = m->varinfo [i];
813                         inst->opcode = OP_REGOFFSET;
814                         inst->inst_basereg = X86_EBP;
815                         inst->inst_offset = - (offset + offsets [i]);
816                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
817                 }
818         }
819         g_free (offsets);
820         offset += locals_stack_size;
821
822         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
823         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
824
825         g_free (cinfo);
826
827         /* change sign? */
828         m->stack_offset = -offset;
829 }
830
831 void
832 mono_arch_create_vars (MonoCompile *cfg)
833 {
834         MonoMethodSignature *sig;
835         CallInfo *cinfo;
836
837         sig = mono_method_signature (cfg->method);
838
839         cinfo = get_call_info (sig, FALSE);
840
841         if (cinfo->ret.storage == ArgValuetypeInReg)
842                 cfg->ret_var_is_local = TRUE;
843
844         g_free (cinfo);
845 }
846
847 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
848  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
849  */
850
851 /* 
852  * take the arguments and generate the arch-specific
853  * instructions to properly call the function in call.
854  * This includes pushing, moving arguments to the right register
855  * etc.
856  * Issue: who does the spilling if needed, and when?
857  */
858 MonoCallInst*
859 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
860         MonoInst *arg, *in;
861         MonoMethodSignature *sig;
862         int i, n, stack_size, type;
863         MonoType *ptype;
864         CallInfo *cinfo;
865
866         stack_size = 0;
867         /* add the vararg cookie before the non-implicit args */
868         if (call->signature->call_convention == MONO_CALL_VARARG) {
869                 MonoInst *sig_arg;
870                 /* FIXME: Add support for signature tokens to AOT */
871                 cfg->disable_aot = TRUE;
872                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
873                 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
874                 sig_arg->inst_p0 = call->signature;
875                 arg->inst_left = sig_arg;
876                 arg->type = STACK_PTR;
877                 /* prepend, so they get reversed */
878                 arg->next = call->out_args;
879                 call->out_args = arg;
880                 stack_size += sizeof (gpointer);
881         }
882         sig = call->signature;
883         n = sig->param_count + sig->hasthis;
884
885         cinfo = get_call_info (sig, FALSE);
886
887         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
888                 if (cinfo->ret.storage == ArgOnStack)
889                         stack_size += sizeof (gpointer);
890         }
891
892         for (i = 0; i < n; ++i) {
893                 if (is_virtual && i == 0) {
894                         /* the argument will be attached to the call instrucion */
895                         in = call->args [i];
896                         stack_size += 4;
897                 } else {
898                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
899                         in = call->args [i];
900                         arg->cil_code = in->cil_code;
901                         arg->inst_left = in;
902                         arg->type = in->type;
903                         /* prepend, so they get reversed */
904                         arg->next = call->out_args;
905                         call->out_args = arg;
906                         if (i >= sig->hasthis) {
907                                 MonoType *t = sig->params [i - sig->hasthis];
908                                 ptype = mono_type_get_underlying_type (t);
909                                 if (t->byref)
910                                         type = MONO_TYPE_U;
911                                 else
912                                         type = ptype->type;
913                                 /* FIXME: validate arguments... */
914                                 switch (type) {
915                                 case MONO_TYPE_I:
916                                 case MONO_TYPE_U:
917                                 case MONO_TYPE_BOOLEAN:
918                                 case MONO_TYPE_CHAR:
919                                 case MONO_TYPE_I1:
920                                 case MONO_TYPE_U1:
921                                 case MONO_TYPE_I2:
922                                 case MONO_TYPE_U2:
923                                 case MONO_TYPE_I4:
924                                 case MONO_TYPE_U4:
925                                 case MONO_TYPE_STRING:
926                                 case MONO_TYPE_CLASS:
927                                 case MONO_TYPE_OBJECT:
928                                 case MONO_TYPE_PTR:
929                                 case MONO_TYPE_FNPTR:
930                                 case MONO_TYPE_ARRAY:
931                                 case MONO_TYPE_SZARRAY:
932                                         stack_size += 4;
933                                         break;
934                                 case MONO_TYPE_I8:
935                                 case MONO_TYPE_U8:
936                                         stack_size += 8;
937                                         break;
938                                 case MONO_TYPE_R4:
939                                         stack_size += 4;
940                                         arg->opcode = OP_OUTARG_R4;
941                                         break;
942                                 case MONO_TYPE_R8:
943                                         stack_size += 8;
944                                         arg->opcode = OP_OUTARG_R8;
945                                         break;
946                                 case MONO_TYPE_VALUETYPE: {
947                                         int size;
948                                         if (sig->pinvoke) 
949                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
950                                         else 
951                                                 size = mono_type_stack_size (&in->klass->byval_arg, NULL);
952
953                                         stack_size += size;
954                                         arg->opcode = OP_OUTARG_VT;
955                                         arg->klass = in->klass;
956                                         arg->unused = sig->pinvoke;
957                                         arg->inst_imm = size; 
958                                         break;
959                                 }
960                                 case MONO_TYPE_TYPEDBYREF:
961                                         stack_size += sizeof (MonoTypedRef);
962                                         arg->opcode = OP_OUTARG_VT;
963                                         arg->klass = in->klass;
964                                         arg->unused = sig->pinvoke;
965                                         arg->inst_imm = sizeof (MonoTypedRef); 
966                                         break;
967                                 default:
968                                         g_error ("unknown type 0x%02x in mono_arch_call_opcode\n", type);
969                                 }
970                         } else {
971                                 /* the this argument */
972                                 stack_size += 4;
973                         }
974                 }
975         }
976
977         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
978                 if (cinfo->ret.storage == ArgValuetypeInReg) {
979                         MonoInst *zero_inst;
980                         /*
981                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
982                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
983                          * before calling the function. So we add a dummy instruction to represent pushing the 
984                          * struct return address to the stack. The return address will be saved to this stack slot 
985                          * by the code emitted in this_vret_args.
986                          */
987                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
988                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
989                         zero_inst->inst_p0 = 0;
990                         arg->inst_left = zero_inst;
991                         arg->type = STACK_PTR;
992                         /* prepend, so they get reversed */
993                         arg->next = call->out_args;
994                         call->out_args = arg;
995                 }
996                 else
997                         /* if the function returns a struct, the called method already does a ret $0x4 */
998                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
999                                 stack_size -= 4;
1000         }
1001
1002         call->stack_usage = stack_size;
1003         g_free (cinfo);
1004
1005         /* 
1006          * should set more info in call, such as the stack space
1007          * used by the args that needs to be added back to esp
1008          */
1009
1010         return call;
1011 }
1012
1013 /*
1014  * Allow tracing to work with this interface (with an optional argument)
1015  */
1016 void*
1017 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1018 {
1019         guchar *code = p;
1020
1021         /* if some args are passed in registers, we need to save them here */
1022         x86_push_reg (code, X86_EBP);
1023
1024         if (cfg->compile_aot) {
1025                 x86_push_imm (code, cfg->method);
1026                 x86_mov_reg_imm (code, X86_EAX, func);
1027                 x86_call_reg (code, X86_EAX);
1028         } else {
1029                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1030                 x86_push_imm (code, cfg->method);
1031                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1032                 x86_call_code (code, 0);
1033         }
1034         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1035
1036         return code;
1037 }
1038
1039 enum {
1040         SAVE_NONE,
1041         SAVE_STRUCT,
1042         SAVE_EAX,
1043         SAVE_EAX_EDX,
1044         SAVE_FP
1045 };
1046
1047 void*
1048 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1049 {
1050         guchar *code = p;
1051         int arg_size = 0, save_mode = SAVE_NONE;
1052         MonoMethod *method = cfg->method;
1053         
1054         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1055         case MONO_TYPE_VOID:
1056                 /* special case string .ctor icall */
1057                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1058                         save_mode = SAVE_EAX;
1059                 else
1060                         save_mode = SAVE_NONE;
1061                 break;
1062         case MONO_TYPE_I8:
1063         case MONO_TYPE_U8:
1064                 save_mode = SAVE_EAX_EDX;
1065                 break;
1066         case MONO_TYPE_R4:
1067         case MONO_TYPE_R8:
1068                 save_mode = SAVE_FP;
1069                 break;
1070         case MONO_TYPE_VALUETYPE:
1071                 save_mode = SAVE_STRUCT;
1072                 break;
1073         default:
1074                 save_mode = SAVE_EAX;
1075                 break;
1076         }
1077
1078         switch (save_mode) {
1079         case SAVE_EAX_EDX:
1080                 x86_push_reg (code, X86_EDX);
1081                 x86_push_reg (code, X86_EAX);
1082                 if (enable_arguments) {
1083                         x86_push_reg (code, X86_EDX);
1084                         x86_push_reg (code, X86_EAX);
1085                         arg_size = 8;
1086                 }
1087                 break;
1088         case SAVE_EAX:
1089                 x86_push_reg (code, X86_EAX);
1090                 if (enable_arguments) {
1091                         x86_push_reg (code, X86_EAX);
1092                         arg_size = 4;
1093                 }
1094                 break;
1095         case SAVE_FP:
1096                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1097                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1098                 if (enable_arguments) {
1099                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1100                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1101                         arg_size = 8;
1102                 }
1103                 break;
1104         case SAVE_STRUCT:
1105                 if (enable_arguments) {
1106                         x86_push_membase (code, X86_EBP, 8);
1107                         arg_size = 4;
1108                 }
1109                 break;
1110         case SAVE_NONE:
1111         default:
1112                 break;
1113         }
1114
1115         if (cfg->compile_aot) {
1116                 x86_push_imm (code, method);
1117                 x86_mov_reg_imm (code, X86_EAX, func);
1118                 x86_call_reg (code, X86_EAX);
1119         } else {
1120                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1121                 x86_push_imm (code, method);
1122                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1123                 x86_call_code (code, 0);
1124         }
1125         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1126
1127         switch (save_mode) {
1128         case SAVE_EAX_EDX:
1129                 x86_pop_reg (code, X86_EAX);
1130                 x86_pop_reg (code, X86_EDX);
1131                 break;
1132         case SAVE_EAX:
1133                 x86_pop_reg (code, X86_EAX);
1134                 break;
1135         case SAVE_FP:
1136                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1137                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1138                 break;
1139         case SAVE_NONE:
1140         default:
1141                 break;
1142         }
1143
1144         return code;
1145 }
1146
1147 #define EMIT_COND_BRANCH(ins,cond,sign) \
1148 if (ins->flags & MONO_INST_BRLABEL) { \
1149         if (ins->inst_i0->inst_c0) { \
1150                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1151         } else { \
1152                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1153                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1154                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1155                         x86_branch8 (code, cond, 0, sign); \
1156                 else \
1157                         x86_branch32 (code, cond, 0, sign); \
1158         } \
1159 } else { \
1160         if (ins->inst_true_bb->native_offset) { \
1161                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1162         } else { \
1163                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1164                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1165                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1166                         x86_branch8 (code, cond, 0, sign); \
1167                 else \
1168                         x86_branch32 (code, cond, 0, sign); \
1169         } \
1170 }
1171
1172 /* emit an exception if condition is fail */
1173 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1174         do {                                                        \
1175                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1176                                     MONO_PATCH_INFO_EXC, exc_name);  \
1177                 x86_branch32 (code, cond, 0, signed);               \
1178         } while (0); 
1179
1180 #define EMIT_FPCOMPARE(code) do { \
1181         x86_fcompp (code); \
1182         x86_fnstsw (code); \
1183 } while (0); 
1184
1185
1186 static guint8*
1187 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1188 {
1189         if (cfg->compile_aot) {
1190                 guint32 got_reg = X86_EAX;
1191
1192                 if (cfg->compile_aot) {          
1193                         /*
1194                          * Since the patches are generated by the back end, there is
1195                          * no way to generate a got_var at this point.
1196                          */
1197                         g_assert (cfg->got_var);
1198
1199                         if (cfg->got_var->opcode == OP_REGOFFSET)
1200                                 x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
1201                         else
1202                                 got_reg = cfg->got_var->dreg;
1203                 }
1204
1205                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1206                 x86_call_membase (code, got_reg, 0xf0f0f0f0);
1207         }
1208         else {
1209                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1210                 x86_call_code (code, 0);
1211         }
1212
1213         return code;
1214 }
1215
1216 /* FIXME: Add more instructions */
1217 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1218
1219 static void
1220 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1221 {
1222         MonoInst *ins, *last_ins = NULL;
1223         ins = bb->code;
1224
1225         while (ins) {
1226
1227                 switch (ins->opcode) {
1228                 case OP_ICONST:
1229                         /* reg = 0 -> XOR (reg, reg) */
1230                         /* XOR sets cflags on x86, so we cant do it always */
1231                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1232                                 ins->opcode = CEE_XOR;
1233                                 ins->sreg1 = ins->dreg;
1234                                 ins->sreg2 = ins->dreg;
1235                         }
1236                         break;
1237                 case OP_MUL_IMM: 
1238                         /* remove unnecessary multiplication with 1 */
1239                         if (ins->inst_imm == 1) {
1240                                 if (ins->dreg != ins->sreg1) {
1241                                         ins->opcode = OP_MOVE;
1242                                 } else {
1243                                         last_ins->next = ins->next;
1244                                         ins = ins->next;
1245                                         continue;
1246                                 }
1247                         }
1248                         break;
1249                 case OP_COMPARE_IMM:
1250                         /* OP_COMPARE_IMM (reg, 0) 
1251                          * --> 
1252                          * OP_X86_TEST_NULL (reg) 
1253                          */
1254                         if (!ins->inst_imm)
1255                                 ins->opcode = OP_X86_TEST_NULL;
1256                         break;
1257                 case OP_X86_COMPARE_MEMBASE_IMM:
1258                         /* 
1259                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1260                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1261                          * -->
1262                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1263                          * OP_COMPARE_IMM reg, imm
1264                          *
1265                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1266                          */
1267                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1268                             ins->inst_basereg == last_ins->inst_destbasereg &&
1269                             ins->inst_offset == last_ins->inst_offset) {
1270                                         ins->opcode = OP_COMPARE_IMM;
1271                                         ins->sreg1 = last_ins->sreg1;
1272
1273                                         /* check if we can remove cmp reg,0 with test null */
1274                                         if (!ins->inst_imm)
1275                                                 ins->opcode = OP_X86_TEST_NULL;
1276                                 }
1277
1278                         break;
1279                 case OP_LOAD_MEMBASE:
1280                 case OP_LOADI4_MEMBASE:
1281                         /* 
1282                          * Note: if reg1 = reg2 the load op is removed
1283                          *
1284                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1285                          * OP_LOAD_MEMBASE offset(basereg), reg2
1286                          * -->
1287                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1288                          * OP_MOVE reg1, reg2
1289                          */
1290                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1291                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1292                             ins->inst_basereg == last_ins->inst_destbasereg &&
1293                             ins->inst_offset == last_ins->inst_offset) {
1294                                 if (ins->dreg == last_ins->sreg1) {
1295                                         last_ins->next = ins->next;                             
1296                                         ins = ins->next;                                
1297                                         continue;
1298                                 } else {
1299                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1300                                         ins->opcode = OP_MOVE;
1301                                         ins->sreg1 = last_ins->sreg1;
1302                                 }
1303
1304                         /* 
1305                          * Note: reg1 must be different from the basereg in the second load
1306                          * Note: if reg1 = reg2 is equal then second load is removed
1307                          *
1308                          * OP_LOAD_MEMBASE offset(basereg), reg1
1309                          * OP_LOAD_MEMBASE offset(basereg), reg2
1310                          * -->
1311                          * OP_LOAD_MEMBASE offset(basereg), reg1
1312                          * OP_MOVE reg1, reg2
1313                          */
1314                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1315                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1316                               ins->inst_basereg != last_ins->dreg &&
1317                               ins->inst_basereg == last_ins->inst_basereg &&
1318                               ins->inst_offset == last_ins->inst_offset) {
1319
1320                                 if (ins->dreg == last_ins->dreg) {
1321                                         last_ins->next = ins->next;                             
1322                                         ins = ins->next;                                
1323                                         continue;
1324                                 } else {
1325                                         ins->opcode = OP_MOVE;
1326                                         ins->sreg1 = last_ins->dreg;
1327                                 }
1328
1329                                 //g_assert_not_reached ();
1330
1331 #if 0
1332                         /* 
1333                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1334                          * OP_LOAD_MEMBASE offset(basereg), reg
1335                          * -->
1336                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1337                          * OP_ICONST reg, imm
1338                          */
1339                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1340                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1341                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1342                                    ins->inst_offset == last_ins->inst_offset) {
1343                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1344                                 ins->opcode = OP_ICONST;
1345                                 ins->inst_c0 = last_ins->inst_imm;
1346                                 g_assert_not_reached (); // check this rule
1347 #endif
1348                         }
1349                         break;
1350                 case OP_LOADU1_MEMBASE:
1351                 case OP_LOADI1_MEMBASE:
1352                         /* 
1353                          * Note: if reg1 = reg2 the load op is removed
1354                          *
1355                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1356                          * OP_LOAD_MEMBASE offset(basereg), reg2
1357                          * -->
1358                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1359                          * OP_MOVE reg1, reg2
1360                          */
1361                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1362                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1363                                         ins->inst_offset == last_ins->inst_offset) {
1364                                 if (ins->dreg == last_ins->sreg1) {
1365                                         last_ins->next = ins->next;                             
1366                                         ins = ins->next;                                
1367                                         continue;
1368                                 } else {
1369                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1370                                         ins->opcode = OP_MOVE;
1371                                         ins->sreg1 = last_ins->sreg1;
1372                                 }
1373                         }
1374                         break;
1375                 case OP_LOADU2_MEMBASE:
1376                 case OP_LOADI2_MEMBASE:
1377                         /* 
1378                          * Note: if reg1 = reg2 the load op is removed
1379                          *
1380                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1381                          * OP_LOAD_MEMBASE offset(basereg), reg2
1382                          * -->
1383                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1384                          * OP_MOVE reg1, reg2
1385                          */
1386                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1387                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1388                                         ins->inst_offset == last_ins->inst_offset) {
1389                                 if (ins->dreg == last_ins->sreg1) {
1390                                         last_ins->next = ins->next;                             
1391                                         ins = ins->next;                                
1392                                         continue;
1393                                 } else {
1394                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1395                                         ins->opcode = OP_MOVE;
1396                                         ins->sreg1 = last_ins->sreg1;
1397                                 }
1398                         }
1399                         break;
1400                 case CEE_CONV_I4:
1401                 case CEE_CONV_U4:
1402                 case OP_MOVE:
1403                         /*
1404                          * Removes:
1405                          *
1406                          * OP_MOVE reg, reg 
1407                          */
1408                         if (ins->dreg == ins->sreg1) {
1409                                 if (last_ins)
1410                                         last_ins->next = ins->next;                             
1411                                 ins = ins->next;
1412                                 continue;
1413                         }
1414                         /* 
1415                          * Removes:
1416                          *
1417                          * OP_MOVE sreg, dreg 
1418                          * OP_MOVE dreg, sreg
1419                          */
1420                         if (last_ins && last_ins->opcode == OP_MOVE &&
1421                             ins->sreg1 == last_ins->dreg &&
1422                             ins->dreg == last_ins->sreg1) {
1423                                 last_ins->next = ins->next;                             
1424                                 ins = ins->next;                                
1425                                 continue;
1426                         }
1427                         break;
1428                         
1429                 case OP_X86_PUSH_MEMBASE:
1430                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1431                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1432                             ins->inst_basereg == last_ins->inst_destbasereg &&
1433                             ins->inst_offset == last_ins->inst_offset) {
1434                                     ins->opcode = OP_X86_PUSH;
1435                                     ins->sreg1 = last_ins->sreg1;
1436                         }
1437                         break;
1438                 }
1439                 last_ins = ins;
1440                 ins = ins->next;
1441         }
1442         bb->last_ins = last_ins;
1443 }
1444
1445 static const int 
1446 branch_cc_table [] = {
1447         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1448         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1449         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1450 };
1451
1452 #define DEBUG(a) if (cfg->verbose_level > 1) a
1453 //#define DEBUG(a)
1454
1455 /*
1456  * returns the offset used by spillvar. It allocates a new
1457  * spill variable if necessary. 
1458  */
1459 static int
1460 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1461 {
1462         MonoSpillInfo **si, *info;
1463         int i = 0;
1464
1465         si = &cfg->spill_info; 
1466         
1467         while (i <= spillvar) {
1468
1469                 if (!*si) {
1470                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1471                         info->next = NULL;
1472                         cfg->stack_offset -= sizeof (gpointer);
1473                         info->offset = cfg->stack_offset;
1474                 }
1475
1476                 if (i == spillvar)
1477                         return (*si)->offset;
1478
1479                 i++;
1480                 si = &(*si)->next;
1481         }
1482
1483         g_assert_not_reached ();
1484         return 0;
1485 }
1486
1487 /*
1488  * returns the offset used by spillvar. It allocates a new
1489  * spill float variable if necessary. 
1490  * (same as mono_spillvar_offset but for float)
1491  */
1492 static int
1493 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1494 {
1495         MonoSpillInfo **si, *info;
1496         int i = 0;
1497
1498         si = &cfg->spill_info_float; 
1499         
1500         while (i <= spillvar) {
1501
1502                 if (!*si) {
1503                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1504                         info->next = NULL;
1505                         cfg->stack_offset -= sizeof (double);
1506                         info->offset = cfg->stack_offset;
1507                 }
1508
1509                 if (i == spillvar)
1510                         return (*si)->offset;
1511
1512                 i++;
1513                 si = &(*si)->next;
1514         }
1515
1516         g_assert_not_reached ();
1517         return 0;
1518 }
1519
1520 /*
1521  * Creates a store for spilled floating point items
1522  */
1523 static MonoInst*
1524 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1525 {
1526         MonoInst *store;
1527         MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1528         store->sreg1 = reg;
1529         store->inst_destbasereg = X86_EBP;
1530         store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1531
1532         DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08x(%%sp)) (from %d)\n", spill, store->inst_offset, reg));
1533         return store;
1534 }
1535
1536 /*
1537  * Creates a load for spilled floating point items 
1538  */
1539 static MonoInst*
1540 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1541 {
1542         MonoInst *load;
1543         MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1544         load->dreg = reg;
1545         load->inst_basereg = X86_EBP;
1546         load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1547
1548         DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08x(%%sp)) (from %d)\n", spill, load->inst_offset, reg));
1549         return load;
1550 }
1551
1552 #define is_global_ireg(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && !X86_IS_CALLEE ((r)))
1553 #define reg_is_freeable(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && X86_IS_CALLEE ((r)))
1554
1555 typedef struct {
1556         int born_in;
1557         int killed_in;
1558         int last_use;
1559         int prev_use;
1560         int flags;              /* used to track fp spill/load */
1561 } RegTrack;
1562
1563 static const char*const * ins_spec = pentium_desc;
1564
1565 static void
1566 print_ins (int i, MonoInst *ins)
1567 {
1568         const char *spec = ins_spec [ins->opcode];
1569         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1570         if (spec [MONO_INST_DEST]) {
1571                 if (ins->dreg >= MONO_MAX_IREGS)
1572                         g_print (" R%d <-", ins->dreg);
1573                 else
1574                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1575         }
1576         if (spec [MONO_INST_SRC1]) {
1577                 if (ins->sreg1 >= MONO_MAX_IREGS)
1578                         g_print (" R%d", ins->sreg1);
1579                 else
1580                         g_print (" %s", mono_arch_regname (ins->sreg1));
1581         }
1582         if (spec [MONO_INST_SRC2]) {
1583                 if (ins->sreg2 >= MONO_MAX_IREGS)
1584                         g_print (" R%d", ins->sreg2);
1585                 else
1586                         g_print (" %s", mono_arch_regname (ins->sreg2));
1587         }
1588         if (spec [MONO_INST_CLOB])
1589                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1590         g_print ("\n");
1591 }
1592
1593 static void
1594 print_regtrack (RegTrack *t, int num)
1595 {
1596         int i;
1597         char buf [32];
1598         const char *r;
1599         
1600         for (i = 0; i < num; ++i) {
1601                 if (!t [i].born_in)
1602                         continue;
1603                 if (i >= MONO_MAX_IREGS) {
1604                         g_snprintf (buf, sizeof(buf), "R%d", i);
1605                         r = buf;
1606                 } else
1607                         r = mono_arch_regname (i);
1608                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1609         }
1610 }
1611
1612 typedef struct InstList InstList;
1613
1614 struct InstList {
1615         InstList *prev;
1616         InstList *next;
1617         MonoInst *data;
1618 };
1619
1620 static inline InstList*
1621 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1622 {
1623         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1624         item->data = data;
1625         item->prev = NULL;
1626         item->next = list;
1627         if (list)
1628                 list->prev = item;
1629         return item;
1630 }
1631
1632 /*
1633  * Force the spilling of the variable in the symbolic register 'reg'.
1634  */
1635 static int
1636 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1637 {
1638         MonoInst *load;
1639         int i, sel, spill;
1640         
1641         sel = cfg->rs->iassign [reg];
1642         /*i = cfg->rs->isymbolic [sel];
1643         g_assert (i == reg);*/
1644         i = reg;
1645         spill = ++cfg->spill_count;
1646         cfg->rs->iassign [i] = -spill - 1;
1647         mono_regstate_free_int (cfg->rs, sel);
1648         /* we need to create a spill var and insert a load to sel after the current instruction */
1649         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1650         load->dreg = sel;
1651         load->inst_basereg = X86_EBP;
1652         load->inst_offset = mono_spillvar_offset (cfg, spill);
1653         if (item->prev) {
1654                 while (ins->next != item->prev->data)
1655                         ins = ins->next;
1656         }
1657         load->next = ins->next;
1658         ins->next = load;
1659         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1660         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1661         g_assert (i == sel);
1662
1663         return sel;
1664 }
1665
1666 static int
1667 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1668 {
1669         MonoInst *load;
1670         int i, sel, spill;
1671
1672         DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1673         /* exclude the registers in the current instruction */
1674         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1675                 if (ins->sreg1 >= MONO_MAX_IREGS)
1676                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1677                 else
1678                         regmask &= ~ (1 << ins->sreg1);
1679                 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1680         }
1681         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1682                 if (ins->sreg2 >= MONO_MAX_IREGS)
1683                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1684                 else
1685                         regmask &= ~ (1 << ins->sreg2);
1686                 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1687         }
1688         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1689                 regmask &= ~ (1 << ins->dreg);
1690                 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1691         }
1692
1693         DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1694         g_assert (regmask); /* need at least a register we can free */
1695         sel = -1;
1696         /* we should track prev_use and spill the register that's farther */
1697         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1698                 if (regmask & (1 << i)) {
1699                         sel = i;
1700                         DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1701                         break;
1702                 }
1703         }
1704         i = cfg->rs->isymbolic [sel];
1705         spill = ++cfg->spill_count;
1706         cfg->rs->iassign [i] = -spill - 1;
1707         mono_regstate_free_int (cfg->rs, sel);
1708         /* we need to create a spill var and insert a load to sel after the current instruction */
1709         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1710         load->dreg = sel;
1711         load->inst_basereg = X86_EBP;
1712         load->inst_offset = mono_spillvar_offset (cfg, spill);
1713         if (item->prev) {
1714                 while (ins->next != item->prev->data)
1715                         ins = ins->next;
1716         }
1717         load->next = ins->next;
1718         ins->next = load;
1719         DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1720         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1721         g_assert (i == sel);
1722         
1723         return sel;
1724 }
1725
1726 static MonoInst*
1727 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1728 {
1729         MonoInst *copy;
1730         MONO_INST_NEW (cfg, copy, OP_MOVE);
1731         copy->dreg = dest;
1732         copy->sreg1 = src;
1733         if (ins) {
1734                 copy->next = ins->next;
1735                 ins->next = copy;
1736         }
1737         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1738         return copy;
1739 }
1740
1741 static MonoInst*
1742 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1743 {
1744         MonoInst *store;
1745         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1746         store->sreg1 = reg;
1747         store->inst_destbasereg = X86_EBP;
1748         store->inst_offset = mono_spillvar_offset (cfg, spill);
1749         if (ins) {
1750                 store->next = ins->next;
1751                 ins->next = store;
1752         }
1753         DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08x(%%ebp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
1754         return store;
1755 }
1756
1757 static void
1758 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1759 {
1760         MonoInst *prev;
1761         if (item->next) {
1762                 prev = item->next->data;
1763
1764                 while (prev->next != ins)
1765                         prev = prev->next;
1766                 to_insert->next = ins;
1767                 prev->next = to_insert;
1768         } else {
1769                 to_insert->next = ins;
1770         }
1771         /* 
1772          * needed otherwise in the next instruction we can add an ins to the 
1773          * end and that would get past this instruction.
1774          */
1775         item->data = to_insert; 
1776 }
1777
1778
1779 #if  0
1780 static int
1781 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1782 {
1783         int val = cfg->rs->iassign [sym_reg];
1784         if (val < 0) {
1785                 int spill = 0;
1786                 if (val < -1) {
1787                         /* the register gets spilled after this inst */
1788                         spill = -val -1;
1789                 }
1790                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1791                 if (val < 0)
1792                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1793                 cfg->rs->iassign [sym_reg] = val;
1794                 /* add option to store before the instruction for src registers */
1795                 if (spill)
1796                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1797         }
1798         cfg->rs->isymbolic [val] = sym_reg;
1799         return val;
1800 }
1801 #endif
1802
1803 /* flags used in reginfo->flags */
1804 enum {
1805         MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
1806         MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
1807         MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
1808         MONO_X86_REG_NOT_ECX                    = 1 << 3,
1809         MONO_X86_REG_EAX                                = 1 << 4,
1810         MONO_X86_REG_EDX                                = 1 << 5,
1811         MONO_X86_REG_ECX                                = 1 << 6
1812 };
1813
1814 static int
1815 mono_x86_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1816 {
1817         int val;
1818         int test_mask = dest_mask;
1819
1820         if (flags & MONO_X86_REG_EAX)
1821                 test_mask &= (1 << X86_EAX);
1822         else if (flags & MONO_X86_REG_EDX)
1823                 test_mask &= (1 << X86_EDX);
1824         else if (flags & MONO_X86_REG_ECX)
1825                 test_mask &= (1 << X86_ECX);
1826         else if (flags & MONO_X86_REG_NOT_ECX)
1827                 test_mask &= ~ (1 << X86_ECX);
1828
1829         val = mono_regstate_alloc_int (cfg->rs, test_mask);
1830         if (val >= 0 && test_mask != dest_mask)
1831                 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1832
1833         if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1834                 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1835                 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << X86_ECX)));
1836         }
1837
1838         if (val < 0) {
1839                 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1840                 if (val < 0)
1841                         val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1842         }
1843
1844         return val;
1845 }
1846
1847 static inline void
1848 assign_ireg (MonoRegState *rs, int reg, int hreg)
1849 {
1850         g_assert (reg >= MONO_MAX_IREGS);
1851         g_assert (hreg < MONO_MAX_IREGS);
1852         g_assert (! is_global_ireg (hreg));
1853
1854         rs->iassign [reg] = hreg;
1855         rs->isymbolic [hreg] = reg;
1856         rs->ifree_mask &= ~ (1 << hreg);
1857 }
1858
1859 /*#include "cprop.c"*/
1860
1861 /*
1862  * Local register allocation.
1863  * We first scan the list of instructions and we save the liveness info of
1864  * each register (when the register is first used, when it's value is set etc.).
1865  * We also reverse the list of instructions (in the InstList list) because assigning
1866  * registers backwards allows for more tricks to be used.
1867  */
1868 void
1869 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1870 {
1871         MonoInst *ins;
1872         MonoRegState *rs = cfg->rs;
1873         int i, val, fpcount;
1874         RegTrack *reginfo, *reginfof;
1875         RegTrack *reginfo1, *reginfo2, *reginfod;
1876         InstList *tmp, *reversed = NULL;
1877         const char *spec;
1878         guint32 src1_mask, src2_mask, dest_mask;
1879         GList *fspill_list = NULL;
1880         int fspill = 0;
1881
1882         if (!bb->code)
1883                 return;
1884         rs->next_vireg = bb->max_ireg;
1885         rs->next_vfreg = bb->max_freg;
1886         mono_regstate_assign (rs);
1887         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1888         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1889         rs->ifree_mask = X86_CALLEE_REGS;
1890
1891         ins = bb->code;
1892
1893         /*if (cfg->opt & MONO_OPT_COPYPROP)
1894                 local_copy_prop (cfg, ins);*/
1895
1896         i = 1;
1897         fpcount = 0;
1898         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1899         /* forward pass on the instructions to collect register liveness info */
1900         while (ins) {
1901                 spec = ins_spec [ins->opcode];
1902                 
1903                 DEBUG (print_ins (i, ins));
1904
1905                 if (spec [MONO_INST_SRC1]) {
1906                         if (spec [MONO_INST_SRC1] == 'f') {
1907                                 GList *spill;
1908                                 reginfo1 = reginfof;
1909
1910                                 spill = g_list_first (fspill_list);
1911                                 if (spill && fpcount < MONO_MAX_FREGS) {
1912                                         reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1913                                         fspill_list = g_list_remove (fspill_list, spill->data);
1914                                 } else
1915                                         fpcount--;
1916                         }
1917                         else
1918                                 reginfo1 = reginfo;
1919                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1920                         reginfo1 [ins->sreg1].last_use = i;
1921                         if (spec [MONO_INST_SRC1] == 'L') {
1922                                 /* The virtual register is allocated sequentially */
1923                                 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1924                                 reginfo1 [ins->sreg1 + 1].last_use = i;
1925                                 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1926                                         reginfo1 [ins->sreg1 + 1].born_in = i;
1927
1928                                 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1929                                 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1930                         }
1931                 } else {
1932                         ins->sreg1 = -1;
1933                 }
1934                 if (spec [MONO_INST_SRC2]) {
1935                         if (spec [MONO_INST_SRC2] == 'f') {
1936                                 GList *spill;
1937                                 reginfo2 = reginfof;
1938                                 spill = g_list_first (fspill_list);
1939                                 if (spill) {
1940                                         reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1941                                         fspill_list = g_list_remove (fspill_list, spill->data);
1942                                         if (fpcount >= MONO_MAX_FREGS) {
1943                                                 fspill++;
1944                                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1945                                                 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1946                                         }
1947                                 } else
1948                                         fpcount--;
1949                         }
1950                         else
1951                                 reginfo2 = reginfo;
1952                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1953                         reginfo2 [ins->sreg2].last_use = i;
1954                         if (spec [MONO_INST_SRC2] == 'L') {
1955                                 /* The virtual register is allocated sequentially */
1956                                 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1957                                 reginfo2 [ins->sreg2 + 1].last_use = i;
1958                                 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1959                                         reginfo2 [ins->sreg2 + 1].born_in = i;
1960                         }
1961                         if (spec [MONO_INST_CLOB] == 's') {
1962                                 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1963                                 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1964                         }
1965                 } else {
1966                         ins->sreg2 = -1;
1967                 }
1968                 if (spec [MONO_INST_DEST]) {
1969                         if (spec [MONO_INST_DEST] == 'f') {
1970                                 reginfod = reginfof;
1971                                 if (fpcount >= MONO_MAX_FREGS) {
1972                                         reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1973                                         fspill++;
1974                                         fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1975                                         fpcount--;
1976                                 }
1977                                 fpcount++;
1978                         }
1979                         else
1980                                 reginfod = reginfo;
1981                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1982                                 reginfod [ins->dreg].killed_in = i;
1983                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1984                         reginfod [ins->dreg].last_use = i;
1985                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1986                                 reginfod [ins->dreg].born_in = i;
1987                         if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
1988                                 /* The virtual register is allocated sequentially */
1989                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1990                                 reginfod [ins->dreg + 1].last_use = i;
1991                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1992                                         reginfod [ins->dreg + 1].born_in = i;
1993
1994                                 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
1995                                 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
1996                         }
1997                 } else {
1998                         ins->dreg = -1;
1999                 }
2000
2001                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
2002                 ++i;
2003                 ins = ins->next;
2004         }
2005
2006         // todo: check if we have anything left on fp stack, in verify mode?
2007         fspill = 0;
2008
2009         DEBUG (print_regtrack (reginfo, rs->next_vireg));
2010         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
2011         tmp = reversed;
2012         while (tmp) {
2013                 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
2014                 dest_mask = src1_mask = src2_mask = X86_CALLEE_REGS;
2015                 --i;
2016                 ins = tmp->data;
2017                 spec = ins_spec [ins->opcode];
2018                 prev_dreg = -1;
2019                 clob_dreg = -1;
2020                 DEBUG (g_print ("processing:"));
2021                 DEBUG (print_ins (i, ins));
2022                 if (spec [MONO_INST_CLOB] == 's') {
2023                         /*
2024                          * Shift opcodes, SREG2 must be RCX
2025                          */
2026                         if (rs->ifree_mask & (1 << X86_ECX)) {
2027                                 if (ins->sreg2 < MONO_MAX_IREGS) {
2028                                         /* Argument already in hard reg, need to copy */
2029                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
2030                                         insert_before_ins (ins, tmp, copy);
2031                                 }
2032                                 else {
2033                                         DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
2034                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2035                                 }
2036                         } else {
2037                                 int need_ecx_spill = TRUE;
2038                                 /* 
2039                                  * we first check if src1/dreg is already assigned a register
2040                                  * and then we force a spill of the var assigned to ECX.
2041                                  */
2042                                 /* the destination register can't be ECX */
2043                                 dest_mask &= ~ (1 << X86_ECX);
2044                                 src1_mask &= ~ (1 << X86_ECX);
2045                                 val = rs->iassign [ins->dreg];
2046                                 /* 
2047                                  * the destination register is already assigned to ECX:
2048                                  * we need to allocate another register for it and then
2049                                  * copy from this to ECX.
2050                                  */
2051                                 if (val == X86_ECX && ins->dreg != ins->sreg2) {
2052                                         int new_dest;
2053                                         new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2054                                         g_assert (new_dest >= 0);
2055                                         DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
2056
2057                                         rs->isymbolic [new_dest] = ins->dreg;
2058                                         rs->iassign [ins->dreg] = new_dest;
2059                                         clob_dreg = ins->dreg;
2060                                         ins->dreg = new_dest;
2061                                         create_copy_ins (cfg, X86_ECX, new_dest, ins);
2062                                         need_ecx_spill = FALSE;
2063                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
2064                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
2065                                         rs->iassign [ins->dreg] = val;
2066                                         rs->isymbolic [val] = prev_dreg;
2067                                         ins->dreg = val;*/
2068                                 }
2069                                 if (is_global_ireg (ins->sreg2)) {
2070                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
2071                                         insert_before_ins (ins, tmp, copy);
2072                                 }
2073                                 else {
2074                                         val = rs->iassign [ins->sreg2];
2075                                         if (val >= 0 && val != X86_ECX) {
2076                                                 MonoInst *move = create_copy_ins (cfg, X86_ECX, val, NULL);
2077                                                 DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
2078                                                 move->next = ins;
2079                                                 g_assert_not_reached ();
2080                                                 /* FIXME: where is move connected to the instruction list? */
2081                                                 //tmp->prev->data->next = move;
2082                                         }
2083                                         else {
2084                                                 if (val == X86_ECX)
2085                                                 need_ecx_spill = FALSE;
2086                                         }
2087                                 }
2088                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << X86_ECX))) {
2089                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_ECX]));
2090                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_ECX]);
2091                                         mono_regstate_free_int (rs, X86_ECX);
2092                                 }
2093                                 if (!is_global_ireg (ins->sreg2))
2094                                         /* force-set sreg2 */
2095                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2096                         }
2097                         ins->sreg2 = X86_ECX;
2098                 } else if (spec [MONO_INST_CLOB] == 'd') {
2099                         /*
2100                          * DIVISION/REMAINER
2101                          */
2102                         int dest_reg = X86_EAX;
2103                         int clob_reg = X86_EDX;
2104                         if (spec [MONO_INST_DEST] == 'd') {
2105                                 dest_reg = X86_EDX; /* reminder */
2106                                 clob_reg = X86_EAX;
2107                         }
2108                         if (is_global_ireg (ins->dreg))
2109                                 val = ins->dreg;
2110                         else
2111                                 val = rs->iassign [ins->dreg];
2112                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
2113                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2114                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2115                                 mono_regstate_free_int (rs, dest_reg);
2116                         }
2117                         if (val < 0) {
2118                                 if (val < -1) {
2119                                         /* the register gets spilled after this inst */
2120                                         int spill = -val -1;
2121                                         dest_mask = 1 << dest_reg;
2122                                         prev_dreg = ins->dreg;
2123                                         val = mono_regstate_alloc_int (rs, dest_mask);
2124                                         if (val < 0)
2125                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
2126                                         rs->iassign [ins->dreg] = val;
2127                                         if (spill)
2128                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
2129                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2130                                         rs->isymbolic [val] = prev_dreg;
2131                                         ins->dreg = val;
2132                                 } else {
2133                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
2134                                         prev_dreg = ins->dreg;
2135                                         assign_ireg (rs, ins->dreg, dest_reg);
2136                                         ins->dreg = dest_reg;
2137                                         val = dest_reg;
2138                                 }
2139                         }
2140
2141                         //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
2142                         if (val != dest_reg) { /* force a copy */
2143                                 create_copy_ins (cfg, val, dest_reg, ins);
2144                                 if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
2145                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2146                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2147                                         mono_regstate_free_int (rs, dest_reg);
2148                                 }
2149                         }
2150                         if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
2151                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2152                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2153                                 mono_regstate_free_int (rs, clob_reg);
2154                         }
2155                         src1_mask = 1 << X86_EAX;
2156                         src2_mask = 1 << X86_ECX;
2157                 } else if (spec [MONO_INST_DEST] == 'l') {
2158                         int hreg;
2159                         val = rs->iassign [ins->dreg];
2160                         /* check special case when dreg have been moved from ecx (clob shift) */
2161                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2162                                 hreg = clob_dreg + 1;
2163                         else
2164                                 hreg = ins->dreg + 1;
2165
2166                         /* base prev_dreg on fixed hreg, handle clob case */
2167                         val = hreg - 1;
2168
2169                         if (val != rs->isymbolic [X86_EAX] && !(rs->ifree_mask & (1 << X86_EAX))) {
2170                                 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [X86_EAX]));
2171                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2172                                 mono_regstate_free_int (rs, X86_EAX);
2173                         }
2174                         if (hreg != rs->isymbolic [X86_EDX] && !(rs->ifree_mask & (1 << X86_EDX))) {
2175                                 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [X86_EDX]));
2176                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
2177                                 mono_regstate_free_int (rs, X86_EDX);
2178                         }
2179                 } else if (spec [MONO_INST_CLOB] == 'b') {
2180                         /*
2181                          * x86_set_reg instructions, dreg needs to be EAX..EDX
2182                          */     
2183                         dest_mask = (1 << X86_EAX) | (1 << X86_EBX) | (1 << X86_ECX) | (1 << X86_EDX);
2184                         if ((ins->dreg < MONO_MAX_IREGS) && (! (dest_mask & (1 << ins->dreg)))) {
2185                                 /* 
2186                                  * ins->dreg is already a hard reg, need to allocate another
2187                                  * suitable hard reg and make a copy.
2188                                  */
2189                                 int new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2190                                 g_assert (new_dest >= 0);
2191
2192                                 create_copy_ins (cfg, ins->dreg, new_dest, ins);
2193                                 DEBUG (g_print ("\tclob:b changing dreg R%d to %s\n", ins->dreg, mono_arch_regname (new_dest)));
2194                                 ins->dreg = new_dest;
2195
2196                                 /* The hard reg is no longer needed */
2197                                 mono_regstate_free_int (rs, new_dest);
2198                         }
2199                 }
2200
2201                 /*
2202                  * TRACK DREG
2203                  */
2204                 if (spec [MONO_INST_DEST] == 'f') {
2205                         if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
2206                                 GList *spill_node;
2207                                 MonoInst *store;
2208                                 spill_node = g_list_first (fspill_list);
2209                                 g_assert (spill_node);
2210
2211                                 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
2212                                 insert_before_ins (ins, tmp, store);
2213                                 fspill_list = g_list_remove (fspill_list, spill_node->data);
2214                                 fspill--;
2215                         }
2216                 } else if (spec [MONO_INST_DEST] == 'L') {
2217                         int hreg;
2218                         val = rs->iassign [ins->dreg];
2219                         /* check special case when dreg have been moved from ecx (clob shift) */
2220                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2221                                 hreg = clob_dreg + 1;
2222                         else
2223                                 hreg = ins->dreg + 1;
2224
2225                         /* base prev_dreg on fixed hreg, handle clob case */
2226                         prev_dreg = hreg - 1;
2227
2228                         if (val < 0) {
2229                                 int spill = 0;
2230                                 if (val < -1) {
2231                                         /* the register gets spilled after this inst */
2232                                         spill = -val -1;
2233                                 }
2234                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2235                                 rs->iassign [ins->dreg] = val;
2236                                 if (spill)
2237                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2238                         }
2239
2240                         DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
2241  
2242                         rs->isymbolic [val] = hreg - 1;
2243                         ins->dreg = val;
2244                         
2245                         val = rs->iassign [hreg];
2246                         if (val < 0) {
2247                                 int spill = 0;
2248                                 if (val < -1) {
2249                                         /* the register gets spilled after this inst */
2250                                         spill = -val -1;
2251                                 }
2252                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2253                                 rs->iassign [hreg] = val;
2254                                 if (spill)
2255                                         create_spilled_store (cfg, spill, val, hreg, ins);
2256                         }
2257
2258                         DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
2259                         rs->isymbolic [val] = hreg;
2260                         /* save reg allocating into unused */
2261                         ins->unused = val;
2262
2263                         /* check if we can free our long reg */
2264                         if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2265                                 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
2266                                 mono_regstate_free_int (rs, val);
2267                         }
2268                 }
2269                 else if (ins->dreg >= MONO_MAX_IREGS) {
2270                         int hreg;
2271                         val = rs->iassign [ins->dreg];
2272                         if (spec [MONO_INST_DEST] == 'l') {
2273                                 /* check special case when dreg have been moved from ecx (clob shift) */
2274                                 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2275                                         hreg = clob_dreg + 1;
2276                                 else
2277                                         hreg = ins->dreg + 1;
2278
2279                                 /* base prev_dreg on fixed hreg, handle clob case */
2280                                 prev_dreg = hreg - 1;
2281                         } else
2282                                 prev_dreg = ins->dreg;
2283
2284                         if (val < 0) {
2285                                 int spill = 0;
2286                                 if (val < -1) {
2287                                         /* the register gets spilled after this inst */
2288                                         spill = -val -1;
2289                                 }
2290                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2291                                 rs->iassign [ins->dreg] = val;
2292                                 if (spill)
2293                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2294                         }
2295                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2296                         rs->isymbolic [val] = prev_dreg;
2297                         ins->dreg = val;
2298                         /* handle cases where lreg needs to be eax:edx */
2299                         if (spec [MONO_INST_DEST] == 'l') {
2300                                 /* check special case when dreg have been moved from ecx (clob shift) */
2301                                 int hreg = prev_dreg + 1;
2302                                 val = rs->iassign [hreg];
2303                                 if (val < 0) {
2304                                         int spill = 0;
2305                                         if (val < -1) {
2306                                                 /* the register gets spilled after this inst */
2307                                                 spill = -val -1;
2308                                         }
2309                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2310                                         rs->iassign [hreg] = val;
2311                                         if (spill)
2312                                                 create_spilled_store (cfg, spill, val, hreg, ins);
2313                                 }
2314                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
2315                                 rs->isymbolic [val] = hreg;
2316                                 if (ins->dreg == X86_EAX) {
2317                                         if (val != X86_EDX)
2318                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2319                                 } else if (ins->dreg == X86_EDX) {
2320                                         if (val == X86_EAX) {
2321                                                 /* swap */
2322                                                 g_assert_not_reached ();
2323                                         } else {
2324                                                 /* two forced copies */
2325                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2326                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2327                                         }
2328                                 } else {
2329                                         if (val == X86_EDX) {
2330                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2331                                         } else {
2332                                                 /* two forced copies */
2333                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2334                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2335                                         }
2336                                 }
2337                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2338                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
2339                                         mono_regstate_free_int (rs, val);
2340                                 }
2341                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != X86_EAX && spec [MONO_INST_CLOB] != 'd') {
2342                                 /* this instruction only outputs to EAX, need to copy */
2343                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2344                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != X86_EDX && spec [MONO_INST_CLOB] != 'd') {
2345                                 create_copy_ins (cfg, ins->dreg, X86_EDX, ins);
2346                         }
2347                 }
2348                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
2349                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
2350                         mono_regstate_free_int (rs, ins->dreg);
2351                 }
2352                 /* put src1 in EAX if it needs to be */
2353                 if (spec [MONO_INST_SRC1] == 'a') {
2354                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
2355                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
2356                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2357                                 mono_regstate_free_int (rs, X86_EAX);
2358                         }
2359                         if (ins->sreg1 < MONO_MAX_IREGS) {
2360                                 /* The argument is already in a hard reg, need to copy */
2361                                 MonoInst *copy = create_copy_ins (cfg, X86_EAX, ins->sreg1, NULL);
2362                                 insert_before_ins (ins, tmp, copy);
2363                         }
2364                         else
2365                                 /* force-set sreg1 */
2366                                 assign_ireg (rs, ins->sreg1, X86_EAX);
2367                         ins->sreg1 = X86_EAX;
2368                 }
2369
2370                 /*
2371                  * TRACK SREG1
2372                  */
2373                 if (spec [MONO_INST_SRC1] == 'f') {
2374                         if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
2375                                 MonoInst *load;
2376                                 MonoInst *store = NULL;
2377
2378                                 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2379                                         GList *spill_node;
2380                                         spill_node = g_list_first (fspill_list);
2381                                         g_assert (spill_node);
2382
2383                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);          
2384                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2385                                 }
2386
2387                                 fspill++;
2388                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2389                                 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
2390                                 insert_before_ins (ins, tmp, load);
2391                                 if (store) 
2392                                         insert_before_ins (load, tmp, store);
2393                         }
2394                 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
2395                         /* force source to be same as dest */
2396                         assign_ireg (rs, ins->sreg1, ins->dreg);
2397                         assign_ireg (rs, ins->sreg1 + 1, ins->unused);
2398
2399                         DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
2400                         DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
2401
2402                         ins->sreg1 = ins->dreg;
2403                         /* 
2404                          * No need for saving the reg, we know that src1=dest in this cases
2405                          * ins->inst_c0 = ins->unused;
2406                          */
2407                 }
2408                 else if (ins->sreg1 >= MONO_MAX_IREGS) {
2409                         val = rs->iassign [ins->sreg1];
2410                         prev_sreg1 = ins->sreg1;
2411                         if (val < 0) {
2412                                 int spill = 0;
2413                                 if (val < -1) {
2414                                         /* the register gets spilled after this inst */
2415                                         spill = -val -1;
2416                                 }
2417                                 if (0 && ins->opcode == OP_MOVE) {
2418                                         /* 
2419                                          * small optimization: the dest register is already allocated
2420                                          * but the src one is not: we can simply assign the same register
2421                                          * here and peephole will get rid of the instruction later.
2422                                          * This optimization may interfere with the clobbering handling:
2423                                          * it removes a mov operation that will be added again to handle clobbering.
2424                                          * There are also some other issues that should with make testjit.
2425                                          */
2426                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
2427                                         val = rs->iassign [ins->sreg1] = ins->dreg;
2428                                         //g_assert (val >= 0);
2429                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2430                                 } else {
2431                                         //g_assert (val == -1); /* source cannot be spilled */
2432                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
2433                                         rs->iassign [ins->sreg1] = val;
2434                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2435                                 }
2436                                 if (spill) {
2437                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
2438                                         insert_before_ins (ins, tmp, store);
2439                                 }
2440                         }
2441                         rs->isymbolic [val] = prev_sreg1;
2442                         ins->sreg1 = val;
2443                 } else {
2444                         prev_sreg1 = -1;
2445                 }
2446                 /* handle clobbering of sreg1 */
2447                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
2448                         MonoInst *sreg2_copy = NULL;
2449                         MonoInst *copy = NULL;
2450
2451                         if (ins->dreg == ins->sreg2) {
2452                                 /* 
2453                                  * copying sreg1 to dreg could clobber sreg2, so allocate a new
2454                                  * register for it.
2455                                  */
2456                                 int reg2 = 0;
2457
2458                                 reg2 = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->sreg2, 0);
2459
2460                                 DEBUG (g_print ("\tneed to copy sreg2 %s to reg %s\n", mono_arch_regname (ins->sreg2), mono_arch_regname (reg2)));
2461                                 sreg2_copy = create_copy_ins (cfg, reg2, ins->sreg2, NULL);
2462                                 prev_sreg2 = ins->sreg2 = reg2;
2463
2464                                 mono_regstate_free_int (rs, reg2);
2465                         }
2466
2467                         copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
2468                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
2469                         insert_before_ins (ins, tmp, copy);
2470
2471                         if (sreg2_copy)
2472                                 insert_before_ins (copy, tmp, sreg2_copy);
2473
2474                         /*
2475                          * Need to prevent sreg2 to be allocated to sreg1, since that
2476                          * would screw up the previous copy.
2477                          */
2478                         src2_mask &= ~ (1 << ins->sreg1);
2479                         /* we set sreg1 to dest as well */
2480                         prev_sreg1 = ins->sreg1 = ins->dreg;
2481                         src2_mask &= ~ (1 << ins->dreg);
2482                 }
2483
2484                 /*
2485                  * TRACK SREG2
2486                  */
2487                 if (spec [MONO_INST_SRC2] == 'f') {
2488                         if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
2489                                 MonoInst *load;
2490                                 MonoInst *store = NULL;
2491
2492                                 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2493                                         GList *spill_node;
2494
2495                                         spill_node = g_list_first (fspill_list);
2496                                         g_assert (spill_node);
2497                                         if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
2498                                                 spill_node = g_list_next (spill_node);
2499         
2500                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
2501                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2502                                 } 
2503                                 
2504                                 fspill++;
2505                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2506                                 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
2507                                 insert_before_ins (ins, tmp, load);
2508                                 if (store) 
2509                                         insert_before_ins (load, tmp, store);
2510                         }
2511                 } 
2512                 else if (ins->sreg2 >= MONO_MAX_IREGS) {
2513                         val = rs->iassign [ins->sreg2];
2514                         prev_sreg2 = ins->sreg2;
2515                         if (val < 0) {
2516                                 int spill = 0;
2517                                 if (val < -1) {
2518                                         /* the register gets spilled after this inst */
2519                                         spill = -val -1;
2520                                 }
2521                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
2522                                 rs->iassign [ins->sreg2] = val;
2523                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
2524                                 if (spill)
2525                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
2526                         }
2527                         rs->isymbolic [val] = prev_sreg2;
2528                         ins->sreg2 = val;
2529                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != X86_ECX) {
2530                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [X86_ECX]));
2531                         }
2532                 } else {
2533                         prev_sreg2 = -1;
2534                 }
2535
2536                 if (spec [MONO_INST_CLOB] == 'c') {
2537                         int j, s;
2538                         guint32 clob_mask = X86_CALLEE_REGS;
2539                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
2540                                 s = 1 << j;
2541                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2542                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2543                                 }
2544                         }
2545                 }
2546                 if (spec [MONO_INST_CLOB] == 'a') {
2547                         guint32 clob_reg = X86_EAX;
2548                         if (!(rs->ifree_mask & (1 << clob_reg)) && (rs->isymbolic [clob_reg] >= 8)) {
2549                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2550                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2551                                 mono_regstate_free_int (rs, clob_reg);
2552                         }
2553                 }
2554                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2555                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2556                         mono_regstate_free_int (rs, ins->sreg1);
2557                 }
2558                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2559                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2560                         mono_regstate_free_int (rs, ins->sreg2);
2561                 }*/
2562         
2563                 //DEBUG (print_ins (i, ins));
2564                 /* this may result from a insert_before call */
2565                 if (!tmp->next)
2566                         bb->code = tmp->data;
2567                 tmp = tmp->next;
2568         }
2569
2570         g_free (reginfo);
2571         g_free (reginfof);
2572         g_list_free (fspill_list);
2573 }
2574
2575 static unsigned char*
2576 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2577 {
2578         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2579         x86_fnstcw_membase(code, X86_ESP, 0);
2580         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
2581         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2582         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
2583         x86_fldcw_membase (code, X86_ESP, 2);
2584         if (size == 8) {
2585                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2586                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2587                 x86_pop_reg (code, dreg);
2588                 /* FIXME: need the high register 
2589                  * x86_pop_reg (code, dreg_high);
2590                  */
2591         } else {
2592                 x86_push_reg (code, X86_EAX); // SP = SP - 4
2593                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
2594                 x86_pop_reg (code, dreg);
2595         }
2596         x86_fldcw_membase (code, X86_ESP, 0);
2597         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2598
2599         if (size == 1)
2600                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
2601         else if (size == 2)
2602                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
2603         return code;
2604 }
2605
2606 static unsigned char*
2607 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2608 {
2609         int sreg = tree->sreg1;
2610 #ifdef PLATFORM_WIN32
2611         guint8* br[5];
2612
2613         /*
2614          * Under Windows:
2615          * If requested stack size is larger than one page,
2616          * perform stack-touch operation
2617          */
2618         /*
2619          * Generate stack probe code.
2620          * Under Windows, it is necessary to allocate one page at a time,
2621          * "touching" stack after each successful sub-allocation. This is
2622          * because of the way stack growth is implemented - there is a
2623          * guard page before the lowest stack page that is currently commited.
2624          * Stack normally grows sequentially so OS traps access to the
2625          * guard page and commits more pages when needed.
2626          */
2627         x86_test_reg_imm (code, sreg, ~0xFFF);
2628         br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2629
2630         br[2] = code; /* loop */
2631         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
2632         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
2633         x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2634         x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2635         br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2636         x86_patch (br[3], br[2]);
2637         x86_test_reg_reg (code, sreg, sreg);
2638         br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2639         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2640
2641         br[1] = code; x86_jump8 (code, 0);
2642
2643         x86_patch (br[0], code);
2644         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2645         x86_patch (br[1], code);
2646         x86_patch (br[4], code);
2647 #else /* PLATFORM_WIN32 */
2648         x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
2649 #endif
2650         if (tree->flags & MONO_INST_INIT) {
2651                 int offset = 0;
2652                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
2653                         x86_push_reg (code, X86_EAX);
2654                         offset += 4;
2655                 }
2656                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
2657                         x86_push_reg (code, X86_ECX);
2658                         offset += 4;
2659                 }
2660                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
2661                         x86_push_reg (code, X86_EDI);
2662                         offset += 4;
2663                 }
2664                 
2665                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
2666                 if (sreg != X86_ECX)
2667                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
2668                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2669                                 
2670                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2671                 x86_cld (code);
2672                 x86_prefix (code, X86_REP_PREFIX);
2673                 x86_stosl (code);
2674                 
2675                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2676                         x86_pop_reg (code, X86_EDI);
2677                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2678                         x86_pop_reg (code, X86_ECX);
2679                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2680                         x86_pop_reg (code, X86_EAX);
2681         }
2682         return code;
2683 }
2684
2685
2686 static guint8*
2687 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2688 {
2689         CallInfo *cinfo;
2690         int quad;
2691
2692         /* Move return value to the target register */
2693         switch (ins->opcode) {
2694         case CEE_CALL:
2695         case OP_CALL_REG:
2696         case OP_CALL_MEMBASE:
2697                 if (ins->dreg != X86_EAX)
2698                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2699                 break;
2700         case OP_VCALL:
2701         case OP_VCALL_REG:
2702         case OP_VCALL_MEMBASE:
2703                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
2704                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2705                         /* Pop the destination address from the stack */
2706                         x86_pop_reg (code, X86_ECX);
2707                         
2708                         for (quad = 0; quad < 2; quad ++) {
2709                                 switch (cinfo->ret.pair_storage [quad]) {
2710                                 case ArgInIReg:
2711                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
2712                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
2713                                         break;
2714                                 case ArgNone:
2715                                         break;
2716                                 default:
2717                                         g_assert_not_reached ();
2718                                 }
2719                         }
2720                 }
2721                 g_free (cinfo);
2722         default:
2723                 break;
2724         }
2725
2726         return code;
2727 }
2728
2729 static guint8*
2730 emit_tls_get (guint8* code, int dreg, int tls_offset)
2731 {
2732 #ifdef PLATFORM_WIN32
2733         /* 
2734          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
2735          * Journal and/or a disassembly of the TlsGet () function.
2736          */
2737         g_assert (tls_offset < 64);
2738         x86_prefix (code, X86_FS_PREFIX);
2739         x86_mov_reg_mem (code, dreg, 0x18, 4);
2740         /* Dunno what this does but TlsGetValue () contains it */
2741         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2742         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2743 #else
2744         x86_prefix (code, X86_GS_PREFIX);
2745         x86_mov_reg_mem (code, dreg, tls_offset, 4);                    
2746 #endif
2747         return code;
2748 }
2749
2750 #define REAL_PRINT_REG(text,reg) \
2751 mono_assert (reg >= 0); \
2752 x86_push_reg (code, X86_EAX); \
2753 x86_push_reg (code, X86_EDX); \
2754 x86_push_reg (code, X86_ECX); \
2755 x86_push_reg (code, reg); \
2756 x86_push_imm (code, reg); \
2757 x86_push_imm (code, text " %d %p\n"); \
2758 x86_mov_reg_imm (code, X86_EAX, printf); \
2759 x86_call_reg (code, X86_EAX); \
2760 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2761 x86_pop_reg (code, X86_ECX); \
2762 x86_pop_reg (code, X86_EDX); \
2763 x86_pop_reg (code, X86_EAX);
2764
2765 /* benchmark and set based on cpu */
2766 #define LOOP_ALIGNMENT 8
2767 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2768
2769 void
2770 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2771 {
2772         MonoInst *ins;
2773         MonoCallInst *call;
2774         guint offset;
2775         guint8 *code = cfg->native_code + cfg->code_len;
2776         MonoInst *last_ins = NULL;
2777         guint last_offset = 0;
2778         int max_len, cpos;
2779
2780         if (cfg->opt & MONO_OPT_PEEPHOLE)
2781                 peephole_pass (cfg, bb);
2782
2783         if (cfg->opt & MONO_OPT_LOOP) {
2784                 int pad, align = LOOP_ALIGNMENT;
2785                 /* set alignment depending on cpu */
2786                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2787                         pad = align - pad;
2788                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2789                         x86_padding (code, pad);
2790                         cfg->code_len += pad;
2791                         bb->native_offset = cfg->code_len;
2792                 }
2793         }
2794
2795         if (cfg->verbose_level > 2)
2796                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2797
2798         cpos = bb->max_offset;
2799
2800         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2801                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2802                 g_assert (!cfg->compile_aot);
2803                 cpos += 6;
2804
2805                 cov->data [bb->dfn].cil_code = bb->cil_code;
2806                 /* this is not thread save, but good enough */
2807                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2808         }
2809
2810         offset = code - cfg->native_code;
2811
2812         ins = bb->code;
2813         while (ins) {
2814                 offset = code - cfg->native_code;
2815
2816                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2817
2818                 if (offset > (cfg->code_size - max_len - 16)) {
2819                         cfg->code_size *= 2;
2820                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2821                         code = cfg->native_code + offset;
2822                         mono_jit_stats.code_reallocs++;
2823                 }
2824
2825                 mono_debug_record_line_number (cfg, ins, offset);
2826
2827                 switch (ins->opcode) {
2828                 case OP_BIGMUL:
2829                         x86_mul_reg (code, ins->sreg2, TRUE);
2830                         break;
2831                 case OP_BIGMUL_UN:
2832                         x86_mul_reg (code, ins->sreg2, FALSE);
2833                         break;
2834                 case OP_X86_SETEQ_MEMBASE:
2835                 case OP_X86_SETNE_MEMBASE:
2836                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2837                                          ins->inst_basereg, ins->inst_offset, TRUE);
2838                         break;
2839                 case OP_STOREI1_MEMBASE_IMM:
2840                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2841                         break;
2842                 case OP_STOREI2_MEMBASE_IMM:
2843                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2844                         break;
2845                 case OP_STORE_MEMBASE_IMM:
2846                 case OP_STOREI4_MEMBASE_IMM:
2847                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2848                         break;
2849                 case OP_STOREI1_MEMBASE_REG:
2850                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2851                         break;
2852                 case OP_STOREI2_MEMBASE_REG:
2853                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2854                         break;
2855                 case OP_STORE_MEMBASE_REG:
2856                 case OP_STOREI4_MEMBASE_REG:
2857                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2858                         break;
2859                 case CEE_LDIND_I:
2860                 case CEE_LDIND_I4:
2861                 case CEE_LDIND_U4:
2862                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2863                         break;
2864                 case OP_LOADU4_MEM:
2865                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2866                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2867                         break;
2868                 case OP_LOAD_MEMBASE:
2869                 case OP_LOADI4_MEMBASE:
2870                 case OP_LOADU4_MEMBASE:
2871                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2872                         break;
2873                 case OP_LOADU1_MEMBASE:
2874                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2875                         break;
2876                 case OP_LOADI1_MEMBASE:
2877                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2878                         break;
2879                 case OP_LOADU2_MEMBASE:
2880                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2881                         break;
2882                 case OP_LOADI2_MEMBASE:
2883                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2884                         break;
2885                 case CEE_CONV_I1:
2886                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2887                         break;
2888                 case CEE_CONV_I2:
2889                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2890                         break;
2891                 case CEE_CONV_U1:
2892                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2893                         break;
2894                 case CEE_CONV_U2:
2895                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2896                         break;
2897                 case OP_COMPARE:
2898                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2899                         break;
2900                 case OP_COMPARE_IMM:
2901                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2902                         break;
2903                 case OP_X86_COMPARE_MEMBASE_REG:
2904                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2905                         break;
2906                 case OP_X86_COMPARE_MEMBASE_IMM:
2907                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2908                         break;
2909                 case OP_X86_COMPARE_MEMBASE8_IMM:
2910                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2911                         break;
2912                 case OP_X86_COMPARE_REG_MEMBASE:
2913                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2914                         break;
2915                 case OP_X86_COMPARE_MEM_IMM:
2916                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2917                         break;
2918                 case OP_X86_TEST_NULL:
2919                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2920                         break;
2921                 case OP_X86_ADD_MEMBASE_IMM:
2922                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2923                         break;
2924                 case OP_X86_ADD_MEMBASE:
2925                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2926                         break;
2927                 case OP_X86_SUB_MEMBASE_IMM:
2928                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2929                         break;
2930                 case OP_X86_SUB_MEMBASE:
2931                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2932                         break;
2933                 case OP_X86_INC_MEMBASE:
2934                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2935                         break;
2936                 case OP_X86_INC_REG:
2937                         x86_inc_reg (code, ins->dreg);
2938                         break;
2939                 case OP_X86_DEC_MEMBASE:
2940                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2941                         break;
2942                 case OP_X86_DEC_REG:
2943                         x86_dec_reg (code, ins->dreg);
2944                         break;
2945                 case OP_X86_MUL_MEMBASE:
2946                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2947                         break;
2948                 case CEE_BREAK:
2949                         x86_breakpoint (code);
2950                         break;
2951                 case OP_ADDCC:
2952                 case CEE_ADD:
2953                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2954                         break;
2955                 case OP_ADC:
2956                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2957                         break;
2958                 case OP_ADDCC_IMM:
2959                 case OP_ADD_IMM:
2960                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2961                         break;
2962                 case OP_ADC_IMM:
2963                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2964                         break;
2965                 case OP_SUBCC:
2966                 case CEE_SUB:
2967                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2968                         break;
2969                 case OP_SBB:
2970                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2971                         break;
2972                 case OP_SUBCC_IMM:
2973                 case OP_SUB_IMM:
2974                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2975                         break;
2976                 case OP_SBB_IMM:
2977                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2978                         break;
2979                 case CEE_AND:
2980                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2981                         break;
2982                 case OP_AND_IMM:
2983                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2984                         break;
2985                 case CEE_DIV:
2986                         x86_cdq (code);
2987                         x86_div_reg (code, ins->sreg2, TRUE);
2988                         break;
2989                 case CEE_DIV_UN:
2990                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2991                         x86_div_reg (code, ins->sreg2, FALSE);
2992                         break;
2993                 case OP_DIV_IMM:
2994                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2995                         x86_cdq (code);
2996                         x86_div_reg (code, ins->sreg2, TRUE);
2997                         break;
2998                 case CEE_REM:
2999                         x86_cdq (code);
3000                         x86_div_reg (code, ins->sreg2, TRUE);
3001                         break;
3002                 case CEE_REM_UN:
3003                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
3004                         x86_div_reg (code, ins->sreg2, FALSE);
3005                         break;
3006                 case OP_REM_IMM:
3007                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3008                         x86_cdq (code);
3009                         x86_div_reg (code, ins->sreg2, TRUE);
3010                         break;
3011                 case CEE_OR:
3012                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
3013                         break;
3014                 case OP_OR_IMM:
3015                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
3016                         break;
3017                 case CEE_XOR:
3018                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
3019                         break;
3020                 case OP_XOR_IMM:
3021                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
3022                         break;
3023                 case CEE_SHL:
3024                         g_assert (ins->sreg2 == X86_ECX);
3025                         x86_shift_reg (code, X86_SHL, ins->dreg);
3026                         break;
3027                 case CEE_SHR:
3028                         g_assert (ins->sreg2 == X86_ECX);
3029                         x86_shift_reg (code, X86_SAR, ins->dreg);
3030                         break;
3031                 case OP_SHR_IMM:
3032                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
3033                         break;
3034                 case OP_SHR_UN_IMM:
3035                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
3036                         break;
3037                 case CEE_SHR_UN:
3038                         g_assert (ins->sreg2 == X86_ECX);
3039                         x86_shift_reg (code, X86_SHR, ins->dreg);
3040                         break;
3041                 case OP_SHL_IMM:
3042                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
3043                         break;
3044                 case OP_LSHL: {
3045                         guint8 *jump_to_end;
3046
3047                         /* handle shifts below 32 bits */
3048                         x86_shld_reg (code, ins->unused, ins->sreg1);
3049                         x86_shift_reg (code, X86_SHL, ins->sreg1);
3050
3051                         x86_test_reg_imm (code, X86_ECX, 32);
3052                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3053
3054                         /* handle shift over 32 bit */
3055                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3056                         x86_clear_reg (code, ins->sreg1);
3057                         
3058                         x86_patch (jump_to_end, code);
3059                         }
3060                         break;
3061                 case OP_LSHR: {
3062                         guint8 *jump_to_end;
3063
3064                         /* handle shifts below 32 bits */
3065                         x86_shrd_reg (code, ins->sreg1, ins->unused);
3066                         x86_shift_reg (code, X86_SAR, ins->unused);
3067
3068                         x86_test_reg_imm (code, X86_ECX, 32);
3069                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
3070
3071                         /* handle shifts over 31 bits */
3072                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3073                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
3074                         
3075                         x86_patch (jump_to_end, code);
3076                         }
3077                         break;
3078                 case OP_LSHR_UN: {
3079                         guint8 *jump_to_end;
3080
3081                         /* handle shifts below 32 bits */
3082                         x86_shrd_reg (code, ins->sreg1, ins->unused);
3083                         x86_shift_reg (code, X86_SHR, ins->unused);
3084
3085                         x86_test_reg_imm (code, X86_ECX, 32);
3086                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
3087
3088                         /* handle shifts over 31 bits */
3089                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3090                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
3091                         
3092                         x86_patch (jump_to_end, code);
3093                         }
3094                         break;
3095                 case OP_LSHL_IMM:
3096                         if (ins->inst_imm >= 32) {
3097                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3098                                 x86_clear_reg (code, ins->sreg1);
3099                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
3100                         } else {
3101                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
3102                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
3103                         }
3104                         break;
3105                 case OP_LSHR_IMM:
3106                         if (ins->inst_imm >= 32) {
3107                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
3108                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
3109                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
3110                         } else {
3111                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3112                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
3113                         }
3114                         break;
3115                 case OP_LSHR_UN_IMM:
3116                         if (ins->inst_imm >= 32) {
3117                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3118                                 x86_clear_reg (code, ins->unused);
3119                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
3120                         } else {
3121                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3122                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
3123                         }
3124                         break;
3125                 case CEE_NOT:
3126                         x86_not_reg (code, ins->sreg1);
3127                         break;
3128                 case CEE_NEG:
3129                         x86_neg_reg (code, ins->sreg1);
3130                         break;
3131                 case OP_SEXT_I1:
3132                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3133                         break;
3134                 case OP_SEXT_I2:
3135                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3136                         break;
3137                 case CEE_MUL:
3138                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3139                         break;
3140                 case OP_MUL_IMM:
3141                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
3142                         break;
3143                 case CEE_MUL_OVF:
3144                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3145                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3146                         break;
3147                 case CEE_MUL_OVF_UN: {
3148                         /* the mul operation and the exception check should most likely be split */
3149                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
3150                         /*g_assert (ins->sreg2 == X86_EAX);
3151                         g_assert (ins->dreg == X86_EAX);*/
3152                         if (ins->sreg2 == X86_EAX) {
3153                                 non_eax_reg = ins->sreg1;
3154                         } else if (ins->sreg1 == X86_EAX) {
3155                                 non_eax_reg = ins->sreg2;
3156                         } else {
3157                                 /* no need to save since we're going to store to it anyway */
3158                                 if (ins->dreg != X86_EAX) {
3159                                         saved_eax = TRUE;
3160                                         x86_push_reg (code, X86_EAX);
3161                                 }
3162                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
3163                                 non_eax_reg = ins->sreg2;
3164                         }
3165                         if (ins->dreg == X86_EDX) {
3166                                 if (!saved_eax) {
3167                                         saved_eax = TRUE;
3168                                         x86_push_reg (code, X86_EAX);
3169                                 }
3170                         } else if (ins->dreg != X86_EAX) {
3171                                 saved_edx = TRUE;
3172                                 x86_push_reg (code, X86_EDX);
3173                         }
3174                         x86_mul_reg (code, non_eax_reg, FALSE);
3175                         /* save before the check since pop and mov don't change the flags */
3176                         if (ins->dreg != X86_EAX)
3177                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3178                         if (saved_edx)
3179                                 x86_pop_reg (code, X86_EDX);
3180                         if (saved_eax)
3181                                 x86_pop_reg (code, X86_EAX);
3182                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3183                         break;
3184                 }
3185                 case OP_ICONST:
3186                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
3187                         break;
3188                 case OP_AOTCONST:
3189                         g_assert_not_reached ();
3190                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
3191                         x86_mov_reg_imm (code, ins->dreg, 0);
3192                         break;
3193                 case OP_LOAD_GOTADDR:
3194                         x86_call_imm (code, 0);
3195                         /* 
3196                          * The patch needs to point to the pop, since the GOT offset needs 
3197                          * to be added to that address.
3198                          */
3199                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3200                         x86_pop_reg (code, ins->dreg);
3201                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
3202                         break;
3203                 case OP_GOT_ENTRY:
3204                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3205                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
3206                         break;
3207                 case OP_X86_PUSH_GOT_ENTRY:
3208                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3209                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
3210                         break;
3211                 case CEE_CONV_I4:
3212                 case OP_MOVE:
3213                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3214                         break;
3215                 case CEE_CONV_U4:
3216                         g_assert_not_reached ();
3217                 case CEE_JMP: {
3218                         /*
3219                          * Note: this 'frame destruction' logic is useful for tail calls, too.
3220                          * Keep in sync with the code in emit_epilog.
3221                          */
3222                         int pos = 0;
3223
3224                         /* FIXME: no tracing support... */
3225                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3226                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3227                         /* reset offset to make max_len work */
3228                         offset = code - cfg->native_code;
3229
3230                         g_assert (!cfg->method->save_lmf);
3231
3232                         if (cfg->used_int_regs & (1 << X86_EBX))
3233                                 pos -= 4;
3234                         if (cfg->used_int_regs & (1 << X86_EDI))
3235                                 pos -= 4;
3236                         if (cfg->used_int_regs & (1 << X86_ESI))
3237                                 pos -= 4;
3238                         if (pos)
3239                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3240         
3241                         if (cfg->used_int_regs & (1 << X86_ESI))
3242                                 x86_pop_reg (code, X86_ESI);
3243                         if (cfg->used_int_regs & (1 << X86_EDI))
3244                                 x86_pop_reg (code, X86_EDI);
3245                         if (cfg->used_int_regs & (1 << X86_EBX))
3246                                 x86_pop_reg (code, X86_EBX);
3247         
3248                         /* restore ESP/EBP */
3249                         x86_leave (code);
3250                         offset = code - cfg->native_code;
3251                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3252                         x86_jump32 (code, 0);
3253                         break;
3254                 }
3255                 case OP_CHECK_THIS:
3256                         /* ensure ins->sreg1 is not NULL
3257                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
3258                          * cmp DWORD PTR [eax], 0
3259                          */
3260                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
3261                         break;
3262                 case OP_ARGLIST: {
3263                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
3264                         x86_push_reg (code, hreg);
3265                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
3266                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
3267                         x86_pop_reg (code, hreg);
3268                         break;
3269                 }
3270                 case OP_FCALL:
3271                 case OP_LCALL:
3272                 case OP_VCALL:
3273                 case OP_VOIDCALL:
3274                 case CEE_CALL:
3275                         call = (MonoCallInst*)ins;
3276                         if (ins->flags & MONO_INST_HAS_METHOD)
3277                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
3278                         else
3279                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
3280                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3281                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
3282                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
3283                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
3284                                  * smart enough to do that optimization yet
3285                                  *
3286                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
3287                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
3288                                  * (most likely from locality benefits). People with other processors should
3289                                  * check on theirs to see what happens.
3290                                  */
3291                                 if (call->stack_usage == 4) {
3292                                         /* we want to use registers that won't get used soon, so use
3293                                          * ecx, as eax will get allocated first. edx is used by long calls,
3294                                          * so we can't use that.
3295                                          */
3296                                         
3297                                         x86_pop_reg (code, X86_ECX);
3298                                 } else {
3299                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3300                                 }
3301                         }
3302                         code = emit_move_return_value (cfg, ins, code);
3303                         break;
3304                 case OP_FCALL_REG:
3305                 case OP_LCALL_REG:
3306                 case OP_VCALL_REG:
3307                 case OP_VOIDCALL_REG:
3308                 case OP_CALL_REG:
3309                         call = (MonoCallInst*)ins;
3310                         x86_call_reg (code, ins->sreg1);
3311                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3312                                 if (call->stack_usage == 4)
3313                                         x86_pop_reg (code, X86_ECX);
3314                                 else
3315                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3316                         }
3317                         code = emit_move_return_value (cfg, ins, code);
3318                         break;
3319                 case OP_FCALL_MEMBASE:
3320                 case OP_LCALL_MEMBASE:
3321                 case OP_VCALL_MEMBASE:
3322                 case OP_VOIDCALL_MEMBASE:
3323                 case OP_CALL_MEMBASE:
3324                         call = (MonoCallInst*)ins;
3325                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
3326                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3327                                 if (call->stack_usage == 4)
3328                                         x86_pop_reg (code, X86_ECX);
3329                                 else
3330                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3331                         }
3332                         code = emit_move_return_value (cfg, ins, code);
3333                         break;
3334                 case OP_OUTARG:
3335                 case OP_X86_PUSH:
3336                         x86_push_reg (code, ins->sreg1);
3337                         break;
3338                 case OP_X86_PUSH_IMM:
3339                         x86_push_imm (code, ins->inst_imm);
3340                         break;
3341                 case OP_X86_PUSH_MEMBASE:
3342                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
3343                         break;
3344                 case OP_X86_PUSH_OBJ: 
3345                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
3346                         x86_push_reg (code, X86_EDI);
3347                         x86_push_reg (code, X86_ESI);
3348                         x86_push_reg (code, X86_ECX);
3349                         if (ins->inst_offset)
3350                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
3351                         else
3352                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
3353                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
3354                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
3355                         x86_cld (code);
3356                         x86_prefix (code, X86_REP_PREFIX);
3357                         x86_movsd (code);
3358                         x86_pop_reg (code, X86_ECX);
3359                         x86_pop_reg (code, X86_ESI);
3360                         x86_pop_reg (code, X86_EDI);
3361                         break;
3362                 case OP_X86_LEA:
3363                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
3364                         break;
3365                 case OP_X86_LEA_MEMBASE:
3366                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3367                         break;
3368                 case OP_X86_XCHG:
3369                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3370                         break;
3371                 case OP_LOCALLOC:
3372                         /* keep alignment */
3373                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
3374                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
3375                         code = mono_emit_stack_alloc (code, ins);
3376                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
3377                         break;
3378                 case CEE_RET:
3379                         x86_ret (code);
3380                         break;
3381                 case CEE_THROW: {
3382                         x86_push_reg (code, ins->sreg1);
3383                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3384                                                           (gpointer)"mono_arch_throw_exception");
3385                         break;
3386                 }
3387                 case OP_RETHROW: {
3388                         x86_push_reg (code, ins->sreg1);
3389                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3390                                                           (gpointer)"mono_arch_rethrow_exception");
3391                         break;
3392                 }
3393                 case OP_CALL_HANDLER: 
3394                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3395                         x86_call_imm (code, 0);
3396                         break;
3397                 case OP_LABEL:
3398                         ins->inst_c0 = code - cfg->native_code;
3399                         break;
3400                 case CEE_BR:
3401                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
3402                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
3403                         //break;
3404                         if (ins->flags & MONO_INST_BRLABEL) {
3405                                 if (ins->inst_i0->inst_c0) {
3406                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
3407                                 } else {
3408                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
3409                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3410                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
3411                                                 x86_jump8 (code, 0);
3412                                         else 
3413                                                 x86_jump32 (code, 0);
3414                                 }
3415                         } else {
3416                                 if (ins->inst_target_bb->native_offset) {
3417                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
3418                                 } else {
3419                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3420                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3421                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3422                                                 x86_jump8 (code, 0);
3423                                         else 
3424                                                 x86_jump32 (code, 0);
3425                                 } 
3426                         }
3427                         break;
3428                 case OP_BR_REG:
3429                         x86_jump_reg (code, ins->sreg1);
3430                         break;
3431                 case OP_CEQ:
3432                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3433                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3434                         break;
3435                 case OP_CLT:
3436                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
3437                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3438                         break;
3439                 case OP_CLT_UN:
3440                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3441                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3442                         break;
3443                 case OP_CGT:
3444                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
3445                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3446                         break;
3447                 case OP_CGT_UN:
3448                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3449                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3450                         break;
3451                 case OP_CNE:
3452                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
3453                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3454                         break;
3455                 case OP_COND_EXC_EQ:
3456                 case OP_COND_EXC_NE_UN:
3457                 case OP_COND_EXC_LT:
3458                 case OP_COND_EXC_LT_UN:
3459                 case OP_COND_EXC_GT:
3460                 case OP_COND_EXC_GT_UN:
3461                 case OP_COND_EXC_GE:
3462                 case OP_COND_EXC_GE_UN:
3463                 case OP_COND_EXC_LE:
3464                 case OP_COND_EXC_LE_UN:
3465                 case OP_COND_EXC_OV:
3466                 case OP_COND_EXC_NO:
3467                 case OP_COND_EXC_C:
3468                 case OP_COND_EXC_NC:
3469                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
3470                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3471                         break;
3472                 case CEE_BEQ:
3473                 case CEE_BNE_UN:
3474                 case CEE_BLT:
3475                 case CEE_BLT_UN:
3476                 case CEE_BGT:
3477                 case CEE_BGT_UN:
3478                 case CEE_BGE:
3479                 case CEE_BGE_UN:
3480                 case CEE_BLE:
3481                 case CEE_BLE_UN:
3482                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
3483                         break;
3484
3485                 /* floating point opcodes */
3486                 case OP_R8CONST: {
3487                         double d = *(double *)ins->inst_p0;
3488
3489                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3490                                 x86_fldz (code);
3491                         } else if (d == 1.0) {
3492                                 x86_fld1 (code);
3493                         } else {
3494                                 if (cfg->compile_aot) {
3495                                         guint32 *val = (guint32*)&d;
3496                                         x86_push_imm (code, val [1]);
3497                                         x86_push_imm (code, val [0]);
3498                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
3499                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3500                                 }
3501                                 else {
3502                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3503                                         x86_fld (code, NULL, TRUE);
3504                                 }
3505                         }
3506                         break;
3507                 }
3508                 case OP_R4CONST: {
3509                         float f = *(float *)ins->inst_p0;
3510
3511                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3512                                 x86_fldz (code);
3513                         } else if (f == 1.0) {
3514                                 x86_fld1 (code);
3515                         } else {
3516                                 if (cfg->compile_aot) {
3517                                         guint32 val = *(guint32*)&f;
3518                                         x86_push_imm (code, val);
3519                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
3520                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3521                                 }
3522                                 else {
3523                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3524                                         x86_fld (code, NULL, FALSE);
3525                                 }
3526                         }
3527                         break;
3528                 }
3529                 case OP_STORER8_MEMBASE_REG:
3530                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3531                         break;
3532                 case OP_LOADR8_SPILL_MEMBASE:
3533                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3534                         x86_fxch (code, 1);
3535                         break;
3536                 case OP_LOADR8_MEMBASE:
3537                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3538                         break;
3539                 case OP_STORER4_MEMBASE_REG:
3540                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3541                         break;
3542                 case OP_LOADR4_MEMBASE:
3543                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3544                         break;
3545                 case CEE_CONV_R4: /* FIXME: change precision */
3546                 case CEE_CONV_R8:
3547                         x86_push_reg (code, ins->sreg1);
3548                         x86_fild_membase (code, X86_ESP, 0, FALSE);
3549                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3550                         break;
3551                 case OP_X86_FP_LOAD_I8:
3552                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3553                         break;
3554                 case OP_X86_FP_LOAD_I4:
3555                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3556                         break;
3557                 case OP_FCONV_TO_I1:
3558                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3559                         break;
3560                 case OP_FCONV_TO_U1:
3561                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3562                         break;
3563                 case OP_FCONV_TO_I2:
3564                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3565                         break;
3566                 case OP_FCONV_TO_U2:
3567                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3568                         break;
3569                 case OP_FCONV_TO_I4:
3570                 case OP_FCONV_TO_I:
3571                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3572                         break;
3573                 case OP_FCONV_TO_I8:
3574                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3575                         x86_fnstcw_membase(code, X86_ESP, 0);
3576                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3577                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3578                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3579                         x86_fldcw_membase (code, X86_ESP, 2);
3580                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3581                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3582                         x86_pop_reg (code, ins->dreg);
3583                         x86_pop_reg (code, ins->unused);
3584                         x86_fldcw_membase (code, X86_ESP, 0);
3585                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3586                         break;
3587                 case OP_LCONV_TO_R_UN: { 
3588                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3589                         guint8 *br;
3590
3591                         /* load 64bit integer to FP stack */
3592                         x86_push_imm (code, 0);
3593                         x86_push_reg (code, ins->sreg2);
3594                         x86_push_reg (code, ins->sreg1);
3595                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3596                         /* store as 80bit FP value */
3597                         x86_fst80_membase (code, X86_ESP, 0);
3598                         
3599                         /* test if lreg is negative */
3600                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3601                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3602         
3603                         /* add correction constant mn */
3604                         x86_fld80_mem (code, mn);
3605                         x86_fld80_membase (code, X86_ESP, 0);
3606                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3607                         x86_fst80_membase (code, X86_ESP, 0);
3608
3609                         x86_patch (br, code);
3610
3611                         x86_fld80_membase (code, X86_ESP, 0);
3612                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
3613
3614                         break;
3615                 }
3616                 case OP_LCONV_TO_OVF_I: {
3617                         guint8 *br [3], *label [1];
3618
3619                         /* 
3620                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3621                          */
3622                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3623
3624                         /* If the low word top bit is set, see if we are negative */
3625                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3626                         /* We are not negative (no top bit set, check for our top word to be zero */
3627                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3628                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3629                         label [0] = code;
3630
3631                         /* throw exception */
3632                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3633                         x86_jump32 (code, 0);
3634         
3635                         x86_patch (br [0], code);
3636                         /* our top bit is set, check that top word is 0xfffffff */
3637                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3638                 
3639                         x86_patch (br [1], code);
3640                         /* nope, emit exception */
3641                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3642                         x86_patch (br [2], label [0]);
3643
3644                         if (ins->dreg != ins->sreg1)
3645                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3646                         break;
3647                 }
3648                 case OP_FADD:
3649                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3650                         break;
3651                 case OP_FSUB:
3652                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3653                         break;          
3654                 case OP_FMUL:
3655                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3656                         break;          
3657                 case OP_FDIV:
3658                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3659                         break;          
3660                 case OP_FNEG:
3661                         x86_fchs (code);
3662                         break;          
3663                 case OP_SIN:
3664                         x86_fsin (code);
3665                         x86_fldz (code);
3666                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3667                         break;          
3668                 case OP_COS:
3669                         x86_fcos (code);
3670                         x86_fldz (code);
3671                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3672                         break;          
3673                 case OP_ABS:
3674                         x86_fabs (code);
3675                         break;          
3676                 case OP_TAN: {
3677                         /* 
3678                          * it really doesn't make sense to inline all this code,
3679                          * it's here just to show that things may not be as simple 
3680                          * as they appear.
3681                          */
3682                         guchar *check_pos, *end_tan, *pop_jump;
3683                         x86_push_reg (code, X86_EAX);
3684                         x86_fptan (code);
3685                         x86_fnstsw (code);
3686                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3687                         check_pos = code;
3688                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3689                         x86_fstp (code, 0); /* pop the 1.0 */
3690                         end_tan = code;
3691                         x86_jump8 (code, 0);
3692                         x86_fldpi (code);
3693                         x86_fp_op (code, X86_FADD, 0);
3694                         x86_fxch (code, 1);
3695                         x86_fprem1 (code);
3696                         x86_fstsw (code);
3697                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3698                         pop_jump = code;
3699                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3700                         x86_fstp (code, 1);
3701                         x86_fptan (code);
3702                         x86_patch (pop_jump, code);
3703                         x86_fstp (code, 0); /* pop the 1.0 */
3704                         x86_patch (check_pos, code);
3705                         x86_patch (end_tan, code);
3706                         x86_fldz (code);
3707                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3708                         x86_pop_reg (code, X86_EAX);
3709                         break;
3710                 }
3711                 case OP_ATAN:
3712                         x86_fld1 (code);
3713                         x86_fpatan (code);
3714                         x86_fldz (code);
3715                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3716                         break;          
3717                 case OP_SQRT:
3718                         x86_fsqrt (code);
3719                         break;          
3720                 case OP_X86_FPOP:
3721                         x86_fstp (code, 0);
3722                         break;          
3723                 case OP_FREM: {
3724                         guint8 *l1, *l2;
3725
3726                         x86_push_reg (code, X86_EAX);
3727                         /* we need to exchange ST(0) with ST(1) */
3728                         x86_fxch (code, 1);
3729
3730                         /* this requires a loop, because fprem somtimes 
3731                          * returns a partial remainder */
3732                         l1 = code;
3733                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3734                         /* x86_fprem1 (code); */
3735                         x86_fprem (code);
3736                         x86_fnstsw (code);
3737                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3738                         l2 = code + 2;
3739                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3740
3741                         /* pop result */
3742                         x86_fstp (code, 1);
3743
3744                         x86_pop_reg (code, X86_EAX);
3745                         break;
3746                 }
3747                 case OP_FCOMPARE:
3748                         if (cfg->opt & MONO_OPT_FCMOV) {
3749                                 x86_fcomip (code, 1);
3750                                 x86_fstp (code, 0);
3751                                 break;
3752                         }
3753                         /* this overwrites EAX */
3754                         EMIT_FPCOMPARE(code);
3755                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3756                         break;
3757                 case OP_FCEQ:
3758                         if (cfg->opt & MONO_OPT_FCMOV) {
3759                                 /* zeroing the register at the start results in 
3760                                  * shorter and faster code (we can also remove the widening op)
3761                                  */
3762                                 guchar *unordered_check;
3763                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3764                                 x86_fcomip (code, 1);
3765                                 x86_fstp (code, 0);
3766                                 unordered_check = code;
3767                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3768                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3769                                 x86_patch (unordered_check, code);
3770                                 break;
3771                         }
3772                         if (ins->dreg != X86_EAX) 
3773                                 x86_push_reg (code, X86_EAX);
3774
3775                         EMIT_FPCOMPARE(code);
3776                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3777                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3778                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3779                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3780
3781                         if (ins->dreg != X86_EAX) 
3782                                 x86_pop_reg (code, X86_EAX);
3783                         break;
3784                 case OP_FCLT:
3785                 case OP_FCLT_UN:
3786                         if (cfg->opt & MONO_OPT_FCMOV) {
3787                                 /* zeroing the register at the start results in 
3788                                  * shorter and faster code (we can also remove the widening op)
3789                                  */
3790                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3791                                 x86_fcomip (code, 1);
3792                                 x86_fstp (code, 0);
3793                                 if (ins->opcode == OP_FCLT_UN) {
3794                                         guchar *unordered_check = code;
3795                                         guchar *jump_to_end;
3796                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3797                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3798                                         jump_to_end = code;
3799                                         x86_jump8 (code, 0);
3800                                         x86_patch (unordered_check, code);
3801                                         x86_inc_reg (code, ins->dreg);
3802                                         x86_patch (jump_to_end, code);
3803                                 } else {
3804                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3805                                 }
3806                                 break;
3807                         }
3808                         if (ins->dreg != X86_EAX) 
3809                                 x86_push_reg (code, X86_EAX);
3810
3811                         EMIT_FPCOMPARE(code);
3812                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3813                         if (ins->opcode == OP_FCLT_UN) {
3814                                 guchar *is_not_zero_check, *end_jump;
3815                                 is_not_zero_check = code;
3816                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3817                                 end_jump = code;
3818                                 x86_jump8 (code, 0);
3819                                 x86_patch (is_not_zero_check, code);
3820                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3821
3822                                 x86_patch (end_jump, code);
3823                         }
3824                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3825                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3826
3827                         if (ins->dreg != X86_EAX) 
3828                                 x86_pop_reg (code, X86_EAX);
3829                         break;
3830                 case OP_FCGT:
3831                 case OP_FCGT_UN:
3832                         if (cfg->opt & MONO_OPT_FCMOV) {
3833                                 /* zeroing the register at the start results in 
3834                                  * shorter and faster code (we can also remove the widening op)
3835                                  */
3836                                 guchar *unordered_check;
3837                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3838                                 x86_fcomip (code, 1);
3839                                 x86_fstp (code, 0);
3840                                 if (ins->opcode == OP_FCGT) {
3841                                         unordered_check = code;
3842                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3843                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3844                                         x86_patch (unordered_check, code);
3845                                 } else {
3846                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3847                                 }
3848                                 break;
3849                         }
3850                         if (ins->dreg != X86_EAX) 
3851                                 x86_push_reg (code, X86_EAX);
3852
3853                         EMIT_FPCOMPARE(code);
3854                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3855                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3856                         if (ins->opcode == OP_FCGT_UN) {
3857                                 guchar *is_not_zero_check, *end_jump;
3858                                 is_not_zero_check = code;
3859                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3860                                 end_jump = code;
3861                                 x86_jump8 (code, 0);
3862                                 x86_patch (is_not_zero_check, code);
3863                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3864         
3865                                 x86_patch (end_jump, code);
3866                         }
3867                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3868                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3869
3870                         if (ins->dreg != X86_EAX) 
3871                                 x86_pop_reg (code, X86_EAX);
3872                         break;
3873                 case OP_FBEQ:
3874                         if (cfg->opt & MONO_OPT_FCMOV) {
3875                                 guchar *jump = code;
3876                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3877                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3878                                 x86_patch (jump, code);
3879                                 break;
3880                         }
3881                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3882                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3883                         break;
3884                 case OP_FBNE_UN:
3885                         /* Branch if C013 != 100 */
3886                         if (cfg->opt & MONO_OPT_FCMOV) {
3887                                 /* branch if !ZF or (PF|CF) */
3888                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3889                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3890                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3891                                 break;
3892                         }
3893                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3894                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3895                         break;
3896                 case OP_FBLT:
3897                         if (cfg->opt & MONO_OPT_FCMOV) {
3898                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3899                                 break;
3900                         }
3901                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3902                         break;
3903                 case OP_FBLT_UN:
3904                         if (cfg->opt & MONO_OPT_FCMOV) {
3905                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3906                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3907                                 break;
3908                         }
3909                         if (ins->opcode == OP_FBLT_UN) {
3910                                 guchar *is_not_zero_check, *end_jump;
3911                                 is_not_zero_check = code;
3912                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3913                                 end_jump = code;
3914                                 x86_jump8 (code, 0);
3915                                 x86_patch (is_not_zero_check, code);
3916                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3917
3918                                 x86_patch (end_jump, code);
3919                         }
3920                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3921                         break;
3922                 case OP_FBGT:
3923                 case OP_FBGT_UN:
3924                         if (cfg->opt & MONO_OPT_FCMOV) {
3925                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3926                                 break;
3927                         }
3928                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3929                         if (ins->opcode == OP_FBGT_UN) {
3930                                 guchar *is_not_zero_check, *end_jump;
3931                                 is_not_zero_check = code;
3932                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3933                                 end_jump = code;
3934                                 x86_jump8 (code, 0);
3935                                 x86_patch (is_not_zero_check, code);
3936                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3937
3938                                 x86_patch (end_jump, code);
3939                         }
3940                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3941                         break;
3942                 case OP_FBGE:
3943                         /* Branch if C013 == 100 or 001 */
3944                         if (cfg->opt & MONO_OPT_FCMOV) {
3945                                 guchar *br1;
3946
3947                                 /* skip branch if C1=1 */
3948                                 br1 = code;
3949                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3950                                 /* branch if (C0 | C3) = 1 */
3951                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3952                                 x86_patch (br1, code);
3953                                 break;
3954                         }
3955                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3956                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3957                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3958                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3959                         break;
3960                 case OP_FBGE_UN:
3961                         /* Branch if C013 == 000 */
3962                         if (cfg->opt & MONO_OPT_FCMOV) {
3963                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3964                                 break;
3965                         }
3966                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3967                         break;
3968                 case OP_FBLE:
3969                         /* Branch if C013=000 or 100 */
3970                         if (cfg->opt & MONO_OPT_FCMOV) {
3971                                 guchar *br1;
3972
3973                                 /* skip branch if C1=1 */
3974                                 br1 = code;
3975                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3976                                 /* branch if C0=0 */
3977                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3978                                 x86_patch (br1, code);
3979                                 break;
3980                         }
3981                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3982                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3983                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3984                         break;
3985                 case OP_FBLE_UN:
3986                         /* Branch if C013 != 001 */
3987                         if (cfg->opt & MONO_OPT_FCMOV) {
3988                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3989                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3990                                 break;
3991                         }
3992                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3993                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3994                         break;
3995                 case CEE_CKFINITE: {
3996                         x86_push_reg (code, X86_EAX);
3997                         x86_fxam (code);
3998                         x86_fnstsw (code);
3999                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
4000                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
4001                         x86_pop_reg (code, X86_EAX);
4002                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
4003                         break;
4004                 }
4005                 case OP_TLS_GET: {
4006                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
4007                         break;
4008                 }
4009                 case OP_ATOMIC_ADD_I4: {
4010                         int dreg = ins->dreg;
4011
4012                         if (dreg == ins->inst_basereg) {
4013                                 x86_push_reg (code, ins->sreg2);
4014                                 dreg = ins->sreg2;
4015                         } 
4016                         
4017                         if (dreg != ins->sreg2)
4018                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
4019
4020                         x86_prefix (code, X86_LOCK_PREFIX);
4021                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4022
4023                         if (dreg != ins->dreg) {
4024                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4025                                 x86_pop_reg (code, dreg);
4026                         }
4027
4028                         break;
4029                 }
4030                 case OP_ATOMIC_ADD_NEW_I4: {
4031                         int dreg = ins->dreg;
4032
4033                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
4034                         if (ins->sreg2 == dreg) {
4035                                 if (dreg == X86_EBX) {
4036                                         dreg = X86_EDI;
4037                                         if (ins->inst_basereg == X86_EDI)
4038                                                 dreg = X86_ESI;
4039                                 } else {
4040                                         dreg = X86_EBX;
4041                                         if (ins->inst_basereg == X86_EBX)
4042                                                 dreg = X86_EDI;
4043                                 }
4044                         } else if (ins->inst_basereg == dreg) {
4045                                 if (dreg == X86_EBX) {
4046                                         dreg = X86_EDI;
4047                                         if (ins->sreg2 == X86_EDI)
4048                                                 dreg = X86_ESI;
4049                                 } else {
4050                                         dreg = X86_EBX;
4051                                         if (ins->sreg2 == X86_EBX)
4052                                                 dreg = X86_EDI;
4053                                 }
4054                         }
4055
4056                         if (dreg != ins->dreg) {
4057                                 x86_push_reg (code, dreg);
4058                         }
4059
4060                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
4061                         x86_prefix (code, X86_LOCK_PREFIX);
4062                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4063                         /* dreg contains the old value, add with sreg2 value */
4064                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
4065                         
4066                         if (ins->dreg != dreg) {
4067                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4068                                 x86_pop_reg (code, dreg);
4069                         }
4070
4071                         break;
4072                 }
4073                 case OP_ATOMIC_EXCHANGE_I4: {
4074                         guchar *br[2];
4075                         int sreg2 = ins->sreg2;
4076                         int breg = ins->inst_basereg;
4077
4078                         /* cmpxchg uses eax as comperand, need to make sure we can use it
4079                          * hack to overcome limits in x86 reg allocator 
4080                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
4081                          */
4082                         if (ins->dreg != X86_EAX)
4083                                 x86_push_reg (code, X86_EAX);
4084                         
4085                         /* We need the EAX reg for the cmpxchg */
4086                         if (ins->sreg2 == X86_EAX) {
4087                                 x86_push_reg (code, X86_EDX);
4088                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
4089                                 sreg2 = X86_EDX;
4090                         }
4091
4092                         if (breg == X86_EAX) {
4093                                 x86_push_reg (code, X86_ESI);
4094                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
4095                                 breg = X86_ESI;
4096                         }
4097
4098                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
4099
4100                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
4101                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
4102                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
4103                         x86_patch (br [1], br [0]);
4104
4105                         if (breg != ins->inst_basereg)
4106                                 x86_pop_reg (code, X86_ESI);
4107
4108                         if (ins->dreg != X86_EAX) {
4109                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
4110                                 x86_pop_reg (code, X86_EAX);
4111                         }
4112
4113                         if (ins->sreg2 != sreg2)
4114                                 x86_pop_reg (code, X86_EDX);
4115
4116                         break;
4117                 }
4118                 default:
4119                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
4120                         g_assert_not_reached ();
4121                 }
4122
4123                 if ((code - cfg->native_code - offset) > max_len) {
4124                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4125                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4126                         g_assert_not_reached ();
4127                 }
4128                
4129                 cpos += max_len;
4130
4131                 last_ins = ins;
4132                 last_offset = offset;
4133                 
4134                 ins = ins->next;
4135         }
4136
4137         cfg->code_len = code - cfg->native_code;
4138 }
4139
4140 void
4141 mono_arch_register_lowlevel_calls (void)
4142 {
4143 }
4144
4145 void
4146 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4147 {
4148         MonoJumpInfo *patch_info;
4149         gboolean compile_aot = !run_cctors;
4150
4151         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4152                 unsigned char *ip = patch_info->ip.i + code;
4153                 const unsigned char *target;
4154
4155                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4156
4157                 if (compile_aot) {
4158                         switch (patch_info->type) {
4159                         case MONO_PATCH_INFO_BB:
4160                         case MONO_PATCH_INFO_LABEL:
4161                                 break;
4162                         default:
4163                                 /* No need to patch these */
4164                                 continue;
4165                         }
4166                 }
4167
4168                 switch (patch_info->type) {
4169                 case MONO_PATCH_INFO_IP:
4170                         *((gconstpointer *)(ip)) = target;
4171                         break;
4172                 case MONO_PATCH_INFO_CLASS_INIT: {
4173                         guint8 *code = ip;
4174                         /* Might already been changed to a nop */
4175                         x86_call_code (code, 0);
4176                         x86_patch (ip, target);
4177                         break;
4178                 }
4179                 case MONO_PATCH_INFO_ABS:
4180                 case MONO_PATCH_INFO_METHOD:
4181                 case MONO_PATCH_INFO_METHOD_JUMP:
4182                 case MONO_PATCH_INFO_INTERNAL_METHOD:
4183                 case MONO_PATCH_INFO_BB:
4184                 case MONO_PATCH_INFO_LABEL:
4185                         x86_patch (ip, target);
4186                         break;
4187                 case MONO_PATCH_INFO_NONE:
4188                         break;
4189                 default: {
4190                         guint32 offset = mono_arch_get_patch_offset (ip);
4191                         *((gconstpointer *)(ip + offset)) = target;
4192                         break;
4193                 }
4194                 }
4195         }
4196 }
4197
4198 guint8 *
4199 mono_arch_emit_prolog (MonoCompile *cfg)
4200 {
4201         MonoMethod *method = cfg->method;
4202         MonoBasicBlock *bb;
4203         MonoMethodSignature *sig;
4204         MonoInst *inst;
4205         int alloc_size, pos, max_offset, i;
4206         guint8 *code;
4207
4208         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
4209         code = cfg->native_code = g_malloc (cfg->code_size);
4210
4211         x86_push_reg (code, X86_EBP);
4212         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4213
4214         alloc_size = - cfg->stack_offset;
4215         pos = 0;
4216
4217         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4218                 /* Might need to attach the thread to the JIT */
4219                 if (lmf_tls_offset != -1) {
4220                         guint8 *buf;
4221
4222                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
4223 #ifdef PLATFORM_WIN32
4224                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4225                         /* FIXME: Add a separate key for LMF to avoid this */
4226                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4227 #endif
4228                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
4229                         buf = code;
4230                         x86_branch8 (code, X86_CC_NE, 0, 0);
4231                         x86_push_imm (code, cfg->domain);
4232                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4233                         x86_patch (buf, code);
4234                 }
4235                 else {
4236                         g_assert (!cfg->compile_aot);
4237                         x86_push_imm (code, cfg->domain);
4238                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4239                 }
4240         }
4241
4242         if (method->save_lmf) {
4243                 pos += sizeof (MonoLMF);
4244
4245                 /* save the current IP */
4246                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4247                 x86_push_imm_template (code);
4248
4249                 /* save all caller saved regs */
4250                 x86_push_reg (code, X86_EBP);
4251                 x86_push_reg (code, X86_ESI);
4252                 x86_push_reg (code, X86_EDI);
4253                 x86_push_reg (code, X86_EBX);
4254
4255                 /* save method info */
4256                 x86_push_imm (code, method);
4257
4258                 /* get the address of lmf for the current thread */
4259                 /* 
4260                  * This is performance critical so we try to use some tricks to make
4261                  * it fast.
4262                  */
4263                 if (lmf_tls_offset != -1) {
4264                         /* Load lmf quicky using the GS register */
4265                         code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
4266 #ifdef PLATFORM_WIN32
4267                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4268                         /* FIXME: Add a separate key for LMF to avoid this */
4269                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4270 #endif
4271                 }
4272                 else {
4273                         if (cfg->compile_aot) {
4274                                 /* The GOT var does not exist yet */
4275                                 x86_call_imm (code, 0);
4276                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4277                                 x86_pop_reg (code, X86_EAX);
4278                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4279                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4280                                 x86_call_membase (code, X86_EAX, 0xf0f0f0f0);
4281                         }
4282                         else
4283                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4284                 }
4285
4286                 /* push lmf */
4287                 x86_push_reg (code, X86_EAX); 
4288                 /* push *lfm (previous_lmf) */
4289                 x86_push_membase (code, X86_EAX, 0);
4290                 /* *(lmf) = ESP */
4291                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4292         } else {
4293
4294                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4295                         x86_push_reg (code, X86_EBX);
4296                         pos += 4;
4297                 }
4298
4299                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4300                         x86_push_reg (code, X86_EDI);
4301                         pos += 4;
4302                 }
4303
4304                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4305                         x86_push_reg (code, X86_ESI);
4306                         pos += 4;
4307                 }
4308         }
4309
4310         alloc_size -= pos;
4311
4312         if (alloc_size) {
4313                 /* See mono_emit_stack_alloc */
4314 #ifdef PLATFORM_WIN32
4315                 guint32 remaining_size = alloc_size;
4316                 while (remaining_size >= 0x1000) {
4317                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4318                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4319                         remaining_size -= 0x1000;
4320                 }
4321                 if (remaining_size)
4322                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4323 #else
4324                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4325 #endif
4326         }
4327
4328         /* compute max_offset in order to use short forward jumps */
4329         max_offset = 0;
4330         if (cfg->opt & MONO_OPT_BRANCH) {
4331                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4332                         MonoInst *ins = bb->code;
4333                         bb->max_offset = max_offset;
4334
4335                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4336                                 max_offset += 6;
4337                         /* max alignment for loops */
4338                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4339                                 max_offset += LOOP_ALIGNMENT;
4340
4341                         while (ins) {
4342                                 if (ins->opcode == OP_LABEL)
4343                                         ins->inst_c1 = max_offset;
4344                                 
4345                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
4346                                 ins = ins->next;
4347                         }
4348                 }
4349         }
4350
4351         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4352                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4353
4354         /* load arguments allocated to register from the stack */
4355         sig = mono_method_signature (method);
4356         pos = 0;
4357
4358         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4359                 inst = cfg->varinfo [pos];
4360                 if (inst->opcode == OP_REGVAR) {
4361                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4362                         if (cfg->verbose_level > 2)
4363                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4364                 }
4365                 pos++;
4366         }
4367
4368         cfg->code_len = code - cfg->native_code;
4369
4370         return code;
4371 }
4372
4373 void
4374 mono_arch_emit_epilog (MonoCompile *cfg)
4375 {
4376         MonoMethod *method = cfg->method;
4377         MonoMethodSignature *sig = mono_method_signature (method);
4378         int quad, pos;
4379         guint32 stack_to_pop;
4380         guint8 *code;
4381         int max_epilog_size = 16;
4382         CallInfo *cinfo;
4383         
4384         if (cfg->method->save_lmf)
4385                 max_epilog_size += 128;
4386         
4387         if (mono_jit_trace_calls != NULL)
4388                 max_epilog_size += 50;
4389
4390         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4391                 cfg->code_size *= 2;
4392                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4393                 mono_jit_stats.code_reallocs++;
4394         }
4395
4396         code = cfg->native_code + cfg->code_len;
4397
4398         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4399                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4400
4401         /* the code restoring the registers must be kept in sync with CEE_JMP */
4402         pos = 0;
4403         
4404         if (method->save_lmf) {
4405                 gint32 prev_lmf_reg;
4406
4407                 /* Find a spare register */
4408                 switch (sig->ret->type) {
4409                 case MONO_TYPE_I8:
4410                 case MONO_TYPE_U8:
4411                         prev_lmf_reg = X86_EDI;
4412                         cfg->used_int_regs |= (1 << X86_EDI);
4413                         break;
4414                 default:
4415                         prev_lmf_reg = X86_EDX;
4416                         break;
4417                 }
4418
4419                 /* reg = previous_lmf */
4420                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, -32, 4);
4421
4422                 /* ecx = lmf */
4423                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, -28, 4);
4424
4425                 /* *(lmf) = previous_lmf */
4426                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4427
4428                 /* restore caller saved regs */
4429                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4430                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, -20, 4);
4431                 }
4432
4433                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4434                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, -16, 4);
4435                 }
4436                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4437                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, -12, 4);
4438                 }
4439
4440                 /* EBP is restored by LEAVE */
4441         } else {
4442                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4443                         pos -= 4;
4444                 }
4445                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4446                         pos -= 4;
4447                 }
4448                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4449                         pos -= 4;
4450                 }
4451
4452                 if (pos)
4453                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4454
4455                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4456                         x86_pop_reg (code, X86_ESI);
4457                 }
4458                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4459                         x86_pop_reg (code, X86_EDI);
4460                 }
4461                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4462                         x86_pop_reg (code, X86_EBX);
4463                 }
4464         }
4465
4466         /* Load returned vtypes into registers if needed */
4467         cinfo = get_call_info (sig, FALSE);
4468         if (cinfo->ret.storage == ArgValuetypeInReg) {
4469                 for (quad = 0; quad < 2; quad ++) {
4470                         switch (cinfo->ret.pair_storage [quad]) {
4471                         case ArgInIReg:
4472                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
4473                                 break;
4474                         case ArgOnFloatFpStack:
4475                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
4476                                 break;
4477                         case ArgOnDoubleFpStack:
4478                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
4479                                 break;
4480                         case ArgNone:
4481                                 break;
4482                         default:
4483                                 g_assert_not_reached ();
4484                         }
4485                 }
4486         }
4487
4488         x86_leave (code);
4489
4490         if (CALLCONV_IS_STDCALL (sig)) {
4491                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4492
4493                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4494         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
4495                 stack_to_pop = 4;
4496         else
4497                 stack_to_pop = 0;
4498
4499         if (stack_to_pop)
4500                 x86_ret_imm (code, stack_to_pop);
4501         else
4502                 x86_ret (code);
4503
4504         g_free (cinfo);
4505
4506         cfg->code_len = code - cfg->native_code;
4507
4508         g_assert (cfg->code_len < cfg->code_size);
4509 }
4510
4511 void
4512 mono_arch_emit_exceptions (MonoCompile *cfg)
4513 {
4514         MonoJumpInfo *patch_info;
4515         int nthrows, i;
4516         guint8 *code;
4517         MonoClass *exc_classes [16];
4518         guint8 *exc_throw_start [16], *exc_throw_end [16];
4519         guint32 code_size;
4520         int exc_count = 0;
4521
4522         /* Compute needed space */
4523         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4524                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4525                         exc_count++;
4526         }
4527
4528         /* 
4529          * make sure we have enough space for exceptions
4530          * 16 is the size of two push_imm instructions and a call
4531          */
4532         if (cfg->compile_aot)
4533                 code_size = exc_count * 32;
4534         else
4535                 code_size = exc_count * 16;
4536
4537         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4538                 cfg->code_size *= 2;
4539                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4540                 mono_jit_stats.code_reallocs++;
4541         }
4542
4543         code = cfg->native_code + cfg->code_len;
4544
4545         nthrows = 0;
4546         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4547                 switch (patch_info->type) {
4548                 case MONO_PATCH_INFO_EXC: {
4549                         MonoClass *exc_class;
4550                         guint8 *buf, *buf2;
4551                         guint32 throw_ip;
4552
4553                         x86_patch (patch_info->ip.i + cfg->native_code, code);
4554
4555                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4556                         g_assert (exc_class);
4557                         throw_ip = patch_info->ip.i;
4558
4559                         /* Find a throw sequence for the same exception class */
4560                         for (i = 0; i < nthrows; ++i)
4561                                 if (exc_classes [i] == exc_class)
4562                                         break;
4563                         if (i < nthrows) {
4564                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4565                                 x86_jump_code (code, exc_throw_start [i]);
4566                                 patch_info->type = MONO_PATCH_INFO_NONE;
4567                         }
4568                         else {
4569                                 guint32 got_reg = X86_EAX;
4570                                 guint32 size;
4571
4572                                 /* Compute size of code following the push <OFFSET> */
4573                                 if (cfg->compile_aot) {
4574                                         size = 5 + 6;
4575                                         if (!cfg->got_var)
4576                                                 size += 32;
4577                                         else if (cfg->got_var->opcode == OP_REGOFFSET)
4578                                                 size += 6;
4579                                 }
4580                                 else
4581                                         size = 5 + 5;
4582
4583                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4584                                         /* Use the shorter form */
4585                                         buf = buf2 = code;
4586                                         x86_push_imm (code, 0);
4587                                 }
4588                                 else {
4589                                         buf = code;
4590                                         x86_push_imm (code, 0xf0f0f0f0);
4591                                         buf2 = code;
4592                                 }
4593
4594                                 if (nthrows < 16) {
4595                                         exc_classes [nthrows] = exc_class;
4596                                         exc_throw_start [nthrows] = code;
4597                                 }
4598
4599                                 if (cfg->compile_aot) {          
4600                                         /*
4601                                          * Since the patches are generated by the back end, there is                                     * no way to generate a got_var at this point.   
4602                                          */
4603                                         if (!cfg->got_var) {
4604                                                 x86_call_imm (code, 0);
4605                                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4606                                                 x86_pop_reg (code, X86_EAX);
4607                                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4608                                         }
4609                                         else {
4610                                                 if (cfg->got_var->opcode == OP_REGOFFSET)
4611                                                         x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
4612                                                 else
4613                                                         got_reg = cfg->got_var->dreg;
4614                                         }
4615                                 }
4616
4617                                 x86_push_imm (code, exc_class->type_token);
4618                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4619                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4620                                 patch_info->ip.i = code - cfg->native_code;
4621                                 if (cfg->compile_aot)
4622                                         x86_call_membase (code, got_reg, 0xf0f0f0f0);
4623                                 else
4624                                         x86_call_code (code, 0);
4625                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4626                                 while (buf < buf2)
4627                                         x86_nop (buf);
4628
4629                                 if (nthrows < 16) {
4630                                         exc_throw_end [nthrows] = code;
4631                                         nthrows ++;
4632                                 }
4633                         }
4634                         break;
4635                 }
4636                 default:
4637                         /* do nothing */
4638                         break;
4639                 }
4640         }
4641
4642         cfg->code_len = code - cfg->native_code;
4643
4644         g_assert (cfg->code_len < cfg->code_size);
4645 }
4646
4647 void
4648 mono_arch_flush_icache (guint8 *code, gint size)
4649 {
4650         /* not needed */
4651 }
4652
4653 void
4654 mono_arch_flush_register_windows (void)
4655 {
4656 }
4657
4658 /*
4659  * Support for fast access to the thread-local lmf structure using the GS
4660  * segment register on NPTL + kernel 2.6.x.
4661  */
4662
4663 static gboolean tls_offset_inited = FALSE;
4664
4665 void
4666 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4667 {
4668 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4669         pthread_t self = pthread_self();
4670         pthread_attr_t attr;
4671         void *staddr = NULL;
4672         size_t stsize = 0;
4673         struct sigaltstack sa;
4674 #endif
4675
4676         if (!tls_offset_inited) {
4677                 if (!getenv ("MONO_NO_TLS")) {
4678 #ifdef PLATFORM_WIN32
4679                         /* 
4680                          * We need to init this multiple times, since when we are first called, the key might not
4681                          * be initialized yet.
4682                          */
4683                         appdomain_tls_offset = mono_domain_get_tls_key ();
4684                         lmf_tls_offset = mono_get_jit_tls_key ();
4685                         thread_tls_offset = mono_thread_get_tls_key ();
4686
4687                         /* Only 64 tls entries can be accessed using inline code */
4688                         if (appdomain_tls_offset >= 64)
4689                                 appdomain_tls_offset = -1;
4690                         if (lmf_tls_offset >= 64)
4691                                 lmf_tls_offset = -1;
4692                         if (thread_tls_offset >= 64)
4693                                 thread_tls_offset = -1;
4694 #else
4695                         tls_offset_inited = TRUE;
4696                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4697                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4698                         thread_tls_offset = mono_thread_get_tls_offset ();
4699 #endif
4700                 }
4701         }               
4702
4703 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4704
4705         /* Determine stack boundaries */
4706         if (!mono_running_on_valgrind ()) {
4707 #ifdef HAVE_PTHREAD_GETATTR_NP
4708                 pthread_getattr_np( self, &attr );
4709 #else
4710 #ifdef HAVE_PTHREAD_ATTR_GET_NP
4711                 pthread_attr_get_np( self, &attr );
4712 #elif defined(sun)
4713                 pthread_attr_init( &attr );
4714                 pthread_attr_getstacksize( &attr, &stsize );
4715 #else
4716 #error "Not implemented"
4717 #endif
4718 #endif
4719 #ifndef sun
4720                 pthread_attr_getstack( &attr, &staddr, &stsize );
4721 #endif
4722         }
4723
4724         /* 
4725          * staddr seems to be wrong for the main thread, so we keep the value in
4726          * tls->end_of_stack
4727          */
4728         tls->stack_size = stsize;
4729
4730         /* Setup an alternate signal stack */
4731         tls->signal_stack = g_malloc (SIGNAL_STACK_SIZE);
4732         tls->signal_stack_size = SIGNAL_STACK_SIZE;
4733
4734         sa.ss_sp = tls->signal_stack;
4735         sa.ss_size = SIGNAL_STACK_SIZE;
4736         sa.ss_flags = SS_ONSTACK;
4737         sigaltstack (&sa, NULL);
4738 #endif
4739 }
4740
4741 void
4742 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4743 {
4744 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4745         struct sigaltstack sa;
4746
4747         sa.ss_sp = tls->signal_stack;
4748         sa.ss_size = SIGNAL_STACK_SIZE;
4749         sa.ss_flags = SS_DISABLE;
4750         sigaltstack  (&sa, NULL);
4751
4752         if (tls->signal_stack)
4753                 g_free (tls->signal_stack);
4754 #endif
4755 }
4756
4757 void
4758 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4759 {
4760
4761         /* add the this argument */
4762         if (this_reg != -1) {
4763                 MonoInst *this;
4764                 MONO_INST_NEW (cfg, this, OP_OUTARG);
4765                 this->type = this_type;
4766                 this->sreg1 = this_reg;
4767                 mono_bblock_add_inst (cfg->cbb, this);
4768         }
4769
4770         if (vt_reg != -1) {
4771                 CallInfo * cinfo = get_call_info (inst->signature, FALSE);
4772                 MonoInst *vtarg;
4773
4774                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4775                         /*
4776                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4777                          * the stack. Save the address here, so the call instruction can
4778                          * access it.
4779                          */
4780                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4781                         vtarg->inst_destbasereg = X86_ESP;
4782                         vtarg->inst_offset = inst->stack_usage;
4783                         vtarg->sreg1 = vt_reg;
4784                         mono_bblock_add_inst (cfg->cbb, vtarg);
4785                 }
4786                 else {
4787                         MonoInst *vtarg;
4788                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4789                         vtarg->type = STACK_MP;
4790                         vtarg->sreg1 = vt_reg;
4791                         mono_bblock_add_inst (cfg->cbb, vtarg);
4792                 }
4793
4794                 g_free (cinfo);
4795         }
4796 }
4797
4798
4799 MonoInst*
4800 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4801 {
4802         MonoInst *ins = NULL;
4803
4804         if (cmethod->klass == mono_defaults.math_class) {
4805                 if (strcmp (cmethod->name, "Sin") == 0) {
4806                         MONO_INST_NEW (cfg, ins, OP_SIN);
4807                         ins->inst_i0 = args [0];
4808                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4809                         MONO_INST_NEW (cfg, ins, OP_COS);
4810                         ins->inst_i0 = args [0];
4811                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4812                         MONO_INST_NEW (cfg, ins, OP_TAN);
4813                         ins->inst_i0 = args [0];
4814                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4815                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4816                         ins->inst_i0 = args [0];
4817                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4818                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4819                         ins->inst_i0 = args [0];
4820                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4821                         MONO_INST_NEW (cfg, ins, OP_ABS);
4822                         ins->inst_i0 = args [0];
4823                 }
4824 #if 0
4825                 /* OP_FREM is not IEEE compatible */
4826                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4827                         MONO_INST_NEW (cfg, ins, OP_FREM);
4828                         ins->inst_i0 = args [0];
4829                         ins->inst_i1 = args [1];
4830                 }
4831 #endif
4832         } else if(cmethod->klass->image == mono_defaults.corlib &&
4833                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4834                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4835
4836                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4837                         MonoInst *ins_iconst;
4838
4839                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4840                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4841                         ins_iconst->inst_c0 = 1;
4842
4843                         ins->inst_i0 = args [0];
4844                         ins->inst_i1 = ins_iconst;
4845                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4846                         MonoInst *ins_iconst;
4847
4848                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4849                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4850                         ins_iconst->inst_c0 = -1;
4851
4852                         ins->inst_i0 = args [0];
4853                         ins->inst_i1 = ins_iconst;
4854                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4855                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4856
4857                         ins->inst_i0 = args [0];
4858                         ins->inst_i1 = args [1];
4859                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4860                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_I4);
4861
4862                         ins->inst_i0 = args [0];
4863                         ins->inst_i1 = args [1];
4864                 }
4865         }
4866
4867         return ins;
4868 }
4869
4870
4871 gboolean
4872 mono_arch_print_tree (MonoInst *tree, int arity)
4873 {
4874         return 0;
4875 }
4876
4877 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4878 {
4879         MonoInst* ins;
4880         
4881         if (appdomain_tls_offset == -1)
4882                 return NULL;
4883
4884         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4885         ins->inst_offset = appdomain_tls_offset;
4886         return ins;
4887 }
4888
4889 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4890 {
4891         MonoInst* ins;
4892
4893         if (thread_tls_offset == -1)
4894                 return NULL;
4895
4896         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4897         ins->inst_offset = thread_tls_offset;
4898         return ins;
4899 }
4900
4901 guint32
4902 mono_arch_get_patch_offset (guint8 *code)
4903 {
4904         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4905                 return 2;
4906         else if ((code [0] == 0xba))
4907                 return 1;
4908         else if ((code [0] == 0x68))
4909                 /* push IMM */
4910                 return 1;
4911         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4912                 /* push <OFFSET>(<REG>) */
4913                 return 2;
4914         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4915                 /* call *<OFFSET>(<REG>) */
4916                 return 2;
4917         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4918                 /* fldl <ADDR> */
4919                 return 2;
4920         else if ((code [0] == 0x58) && (code [1] == 0x05))
4921                 /* pop %eax; add <OFFSET>, %eax */
4922                 return 2;
4923         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4924                 /* pop <REG>; add <OFFSET>, <REG> */
4925                 return 3;
4926         else {
4927                 g_assert_not_reached ();
4928                 return -1;
4929         }
4930 }
4931
4932 gpointer*
4933 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4934 {
4935         guint8 reg = 0;
4936         gint32 disp = 0;
4937
4938         /* go to the start of the call instruction
4939          *
4940          * address_byte = (m << 6) | (o << 3) | reg
4941          * call opcode: 0xff address_byte displacement
4942          * 0xff m=1,o=2 imm8
4943          * 0xff m=2,o=2 imm32
4944          */
4945         code -= 6;
4946         if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4947                 reg = code [4] & 0x07;
4948                 disp = (signed char)code [5];
4949         } else {
4950                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4951                         reg = code [1] & 0x07;
4952                         disp = *((gint32*)(code + 2));
4953                 } else if ((code [1] == 0xe8)) {
4954                         return NULL;
4955                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4956                         /*
4957                          * This is a interface call: should check the above code can't catch it earlier 
4958                          * 8b 40 30   mov    0x30(%eax),%eax
4959                          * ff 10      call   *(%eax)
4960                          */
4961                         disp = 0;
4962                         reg = code [5] & 0x07;
4963                 }
4964                 else
4965                         return NULL;
4966         }
4967
4968         return (gpointer*)(((gint32)(regs [reg])) + disp);
4969 }
4970
4971 gpointer* 
4972 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
4973 {
4974         guint8 reg = 0;
4975         gint32 disp = 0;
4976
4977         code -= 7;
4978         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
4979                 reg = x86_modrm_rm (code [1]);
4980                 disp = code [4];
4981
4982                 if (reg == X86_EAX)
4983                         return NULL;
4984                 else
4985                         return (gpointer*)(((gint32)(regs [reg])) + disp);
4986         }
4987
4988         return NULL;
4989 }