2005-07-06 Zoltan Varga <vargaz@freemail.hu>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14
15 #ifndef PLATFORM_WIN32
16 #include <unistd.h>
17 #include <sys/mman.h>
18 #endif
19
20 #include <mono/metadata/appdomain.h>
21 #include <mono/metadata/debug-helpers.h>
22 #include <mono/metadata/threads.h>
23 #include <mono/metadata/profiler-private.h>
24 #include <mono/utils/mono-math.h>
25
26 #include "trace.h"
27 #include "mini-x86.h"
28 #include "inssel.h"
29 #include "cpu-pentium.h"
30
31 /* On windows, these hold the key returned by TlsAlloc () */
32 static gint lmf_tls_offset = -1;
33 static gint appdomain_tls_offset = -1;
34 static gint thread_tls_offset = -1;
35
36 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
37
38 #ifdef PLATFORM_WIN32
39 /* Under windows, the default pinvoke calling convention is stdcall */
40 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
41 #else
42 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
43 #endif
44
45 #define SIGNAL_STACK_SIZE (64 * 1024)
46
47 #define NOT_IMPLEMENTED g_assert_not_reached ()
48
49 const char*
50 mono_arch_regname (int reg) {
51         switch (reg) {
52         case X86_EAX: return "%eax";
53         case X86_EBX: return "%ebx";
54         case X86_ECX: return "%ecx";
55         case X86_EDX: return "%edx";
56         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
57         case X86_EDI: return "%edi";
58         case X86_ESI: return "%esi";
59         }
60         return "unknown";
61 }
62
63 typedef enum {
64         ArgInIReg,
65         ArgInFloatSSEReg,
66         ArgInDoubleSSEReg,
67         ArgOnStack,
68         ArgValuetypeInReg,
69         ArgOnFloatFpStack,
70         ArgOnDoubleFpStack,
71         ArgNone
72 } ArgStorage;
73
74 typedef struct {
75         gint16 offset;
76         gint8  reg;
77         ArgStorage storage;
78
79         /* Only if storage == ArgValuetypeInReg */
80         ArgStorage pair_storage [2];
81         gint8 pair_regs [2];
82 } ArgInfo;
83
84 typedef struct {
85         int nargs;
86         guint32 stack_usage;
87         guint32 reg_usage;
88         guint32 freg_usage;
89         gboolean need_stack_align;
90         ArgInfo ret;
91         ArgInfo sig_cookie;
92         ArgInfo args [1];
93 } CallInfo;
94
95 #define PARAM_REGS 0
96
97 #define FLOAT_PARAM_REGS 0
98
99 static X86_Reg_No param_regs [] = { 0 };
100
101 #ifdef PLATFORM_WIN32
102 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
103 #endif
104
105 static void inline
106 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
107 {
108     ainfo->offset = *stack_size;
109
110     if (*gr >= PARAM_REGS) {
111                 ainfo->storage = ArgOnStack;
112                 (*stack_size) += sizeof (gpointer);
113     }
114     else {
115                 ainfo->storage = ArgInIReg;
116                 ainfo->reg = param_regs [*gr];
117                 (*gr) ++;
118     }
119 }
120
121 static void inline
122 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
123 {
124         ainfo->offset = *stack_size;
125
126         g_assert (PARAM_REGS == 0);
127         
128         ainfo->storage = ArgOnStack;
129         (*stack_size) += sizeof (gpointer) * 2;
130 }
131
132 static void inline
133 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
134 {
135     ainfo->offset = *stack_size;
136
137     if (*gr >= FLOAT_PARAM_REGS) {
138                 ainfo->storage = ArgOnStack;
139                 (*stack_size) += sizeof (gpointer);
140     }
141     else {
142                 /* A double register */
143                 if (is_double)
144                         ainfo->storage = ArgInDoubleSSEReg;
145                 else
146                         ainfo->storage = ArgInFloatSSEReg;
147                 ainfo->reg = *gr;
148                 (*gr) += 1;
149     }
150 }
151
152
153 static void
154 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
155                gboolean is_return,
156                guint32 *gr, guint32 *fr, guint32 *stack_size)
157 {
158         guint32 size;
159         MonoClass *klass;
160
161         klass = mono_class_from_mono_type (type);
162         if (sig->pinvoke) 
163                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
164         else 
165                 size = mono_type_stack_size (&klass->byval_arg, NULL);
166
167 #ifdef PLATFORM_WIN32
168         if (sig->pinvoke && is_return) {
169                 MonoMarshalType *info;
170
171                 /*
172                  * the exact rules are not very well documented, the code below seems to work with the 
173                  * code generated by gcc 3.3.3 -mno-cygwin.
174                  */
175                 info = mono_marshal_load_type_info (klass);
176                 g_assert (info);
177
178                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
179
180                 /* Special case structs with only a float member */
181                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
182                         ainfo->storage = ArgValuetypeInReg;
183                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
184                         return;
185                 }
186                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
187                         ainfo->storage = ArgValuetypeInReg;
188                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
189                         return;
190                 }               
191                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
192                         ainfo->storage = ArgValuetypeInReg;
193                         ainfo->pair_storage [0] = ArgInIReg;
194                         ainfo->pair_regs [0] = return_regs [0];
195                         if (info->native_size > 4) {
196                                 ainfo->pair_storage [1] = ArgInIReg;
197                                 ainfo->pair_regs [1] = return_regs [1];
198                         }
199                         return;
200                 }
201         }
202 #endif
203
204         ainfo->offset = *stack_size;
205         ainfo->storage = ArgOnStack;
206         *stack_size += ALIGN_TO (size, sizeof (gpointer));
207 }
208
209 /*
210  * get_call_info:
211  *
212  *  Obtain information about a call according to the calling convention.
213  * For x86 ELF, see the "System V Application Binary Interface Intel386 
214  * Architecture Processor Supplment, Fourth Edition" document for more
215  * information.
216  * For x86 win32, see ???.
217  */
218 static CallInfo*
219 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
220 {
221         guint32 i, gr, fr;
222         MonoType *ret_type;
223         int n = sig->hasthis + sig->param_count;
224         guint32 stack_size = 0;
225         CallInfo *cinfo;
226
227         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
228
229         gr = 0;
230         fr = 0;
231
232         /* return value */
233         {
234                 ret_type = mono_type_get_underlying_type (sig->ret);
235                 switch (ret_type->type) {
236                 case MONO_TYPE_BOOLEAN:
237                 case MONO_TYPE_I1:
238                 case MONO_TYPE_U1:
239                 case MONO_TYPE_I2:
240                 case MONO_TYPE_U2:
241                 case MONO_TYPE_CHAR:
242                 case MONO_TYPE_I4:
243                 case MONO_TYPE_U4:
244                 case MONO_TYPE_I:
245                 case MONO_TYPE_U:
246                 case MONO_TYPE_PTR:
247                 case MONO_TYPE_FNPTR:
248                 case MONO_TYPE_CLASS:
249                 case MONO_TYPE_OBJECT:
250                 case MONO_TYPE_SZARRAY:
251                 case MONO_TYPE_ARRAY:
252                 case MONO_TYPE_STRING:
253                         cinfo->ret.storage = ArgInIReg;
254                         cinfo->ret.reg = X86_EAX;
255                         break;
256                 case MONO_TYPE_U8:
257                 case MONO_TYPE_I8:
258                         cinfo->ret.storage = ArgInIReg;
259                         cinfo->ret.reg = X86_EAX;
260                         break;
261                 case MONO_TYPE_R4:
262                         cinfo->ret.storage = ArgOnFloatFpStack;
263                         break;
264                 case MONO_TYPE_R8:
265                         cinfo->ret.storage = ArgOnDoubleFpStack;
266                         break;
267                 case MONO_TYPE_VALUETYPE: {
268                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
269
270                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
271                         if (cinfo->ret.storage == ArgOnStack)
272                                 /* The caller passes the address where the value is stored */
273                                 add_general (&gr, &stack_size, &cinfo->ret);
274                         break;
275                 }
276                 case MONO_TYPE_TYPEDBYREF:
277                         /* Same as a valuetype with size 24 */
278                         add_general (&gr, &stack_size, &cinfo->ret);
279                         ;
280                         break;
281                 case MONO_TYPE_VOID:
282                         cinfo->ret.storage = ArgNone;
283                         break;
284                 default:
285                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
286                 }
287         }
288
289         /* this */
290         if (sig->hasthis)
291                 add_general (&gr, &stack_size, cinfo->args + 0);
292
293         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
294                 gr = PARAM_REGS;
295                 fr = FLOAT_PARAM_REGS;
296                 
297                 /* Emit the signature cookie just before the implicit arguments */
298                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
299         }
300
301         for (i = 0; i < sig->param_count; ++i) {
302                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
303                 MonoType *ptype;
304
305                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
306                         /* We allways pass the sig cookie on the stack for simplicity */
307                         /* 
308                          * Prevent implicit arguments + the sig cookie from being passed 
309                          * in registers.
310                          */
311                         gr = PARAM_REGS;
312                         fr = FLOAT_PARAM_REGS;
313
314                         /* Emit the signature cookie just before the implicit arguments */
315                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
316                 }
317
318                 if (sig->params [i]->byref) {
319                         add_general (&gr, &stack_size, ainfo);
320                         continue;
321                 }
322                 ptype = mono_type_get_underlying_type (sig->params [i]);
323                 switch (ptype->type) {
324                 case MONO_TYPE_BOOLEAN:
325                 case MONO_TYPE_I1:
326                 case MONO_TYPE_U1:
327                         add_general (&gr, &stack_size, ainfo);
328                         break;
329                 case MONO_TYPE_I2:
330                 case MONO_TYPE_U2:
331                 case MONO_TYPE_CHAR:
332                         add_general (&gr, &stack_size, ainfo);
333                         break;
334                 case MONO_TYPE_I4:
335                 case MONO_TYPE_U4:
336                         add_general (&gr, &stack_size, ainfo);
337                         break;
338                 case MONO_TYPE_I:
339                 case MONO_TYPE_U:
340                 case MONO_TYPE_PTR:
341                 case MONO_TYPE_FNPTR:
342                 case MONO_TYPE_CLASS:
343                 case MONO_TYPE_OBJECT:
344                 case MONO_TYPE_STRING:
345                 case MONO_TYPE_SZARRAY:
346                 case MONO_TYPE_ARRAY:
347                         add_general (&gr, &stack_size, ainfo);
348                         break;
349                 case MONO_TYPE_VALUETYPE:
350                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
351                         break;
352                 case MONO_TYPE_TYPEDBYREF:
353                         stack_size += sizeof (MonoTypedRef);
354                         ainfo->storage = ArgOnStack;
355                         break;
356                 case MONO_TYPE_U8:
357                 case MONO_TYPE_I8:
358                         add_general_pair (&gr, &stack_size, ainfo);
359                         break;
360                 case MONO_TYPE_R4:
361                         add_float (&fr, &stack_size, ainfo, FALSE);
362                         break;
363                 case MONO_TYPE_R8:
364                         add_float (&fr, &stack_size, ainfo, TRUE);
365                         break;
366                 default:
367                         g_error ("unexpected type 0x%x", ptype->type);
368                         g_assert_not_reached ();
369                 }
370         }
371
372         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
373                 gr = PARAM_REGS;
374                 fr = FLOAT_PARAM_REGS;
375                 
376                 /* Emit the signature cookie just before the implicit arguments */
377                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
378         }
379
380         cinfo->stack_usage = stack_size;
381         cinfo->reg_usage = gr;
382         cinfo->freg_usage = fr;
383         return cinfo;
384 }
385
386 /*
387  * mono_arch_get_argument_info:
388  * @csig:  a method signature
389  * @param_count: the number of parameters to consider
390  * @arg_info: an array to store the result infos
391  *
392  * Gathers information on parameters such as size, alignment and
393  * padding. arg_info should be large enought to hold param_count + 1 entries. 
394  *
395  * Returns the size of the activation frame.
396  */
397 int
398 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
399 {
400         int k, frame_size = 0;
401         int size, align, pad;
402         int offset = 8;
403         CallInfo *cinfo;
404
405         cinfo = get_call_info (csig, FALSE);
406
407         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
408                 frame_size += sizeof (gpointer);
409                 offset += 4;
410         }
411
412         arg_info [0].offset = offset;
413
414         if (csig->hasthis) {
415                 frame_size += sizeof (gpointer);
416                 offset += 4;
417         }
418
419         arg_info [0].size = frame_size;
420
421         for (k = 0; k < param_count; k++) {
422                 
423                 if (csig->pinvoke)
424                         size = mono_type_native_stack_size (csig->params [k], &align);
425                 else
426                         size = mono_type_stack_size (csig->params [k], &align);
427
428                 /* ignore alignment for now */
429                 align = 1;
430
431                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
432                 arg_info [k].pad = pad;
433                 frame_size += size;
434                 arg_info [k + 1].pad = 0;
435                 arg_info [k + 1].size = size;
436                 offset += pad;
437                 arg_info [k + 1].offset = offset;
438                 offset += size;
439         }
440
441         align = MONO_ARCH_FRAME_ALIGNMENT;
442         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
443         arg_info [k].pad = pad;
444
445         g_free (cinfo);
446
447         return frame_size;
448 }
449
450 static const guchar cpuid_impl [] = {
451         0x55,                           /* push   %ebp */
452         0x89, 0xe5,                     /* mov    %esp,%ebp */
453         0x53,                           /* push   %ebx */
454         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
455         0x0f, 0xa2,                     /* cpuid   */
456         0x50,                           /* push   %eax */
457         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
458         0x89, 0x18,                     /* mov    %ebx,(%eax) */
459         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
460         0x89, 0x08,                     /* mov    %ecx,(%eax) */
461         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
462         0x89, 0x10,                     /* mov    %edx,(%eax) */
463         0x58,                           /* pop    %eax */
464         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
465         0x89, 0x02,                     /* mov    %eax,(%edx) */
466         0x5b,                           /* pop    %ebx */
467         0xc9,                           /* leave   */
468         0xc3,                           /* ret     */
469 };
470
471 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
472
473 static int 
474 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
475 {
476         int have_cpuid = 0;
477 #ifndef _MSC_VER
478         __asm__  __volatile__ (
479                 "pushfl\n"
480                 "popl %%eax\n"
481                 "movl %%eax, %%edx\n"
482                 "xorl $0x200000, %%eax\n"
483                 "pushl %%eax\n"
484                 "popfl\n"
485                 "pushfl\n"
486                 "popl %%eax\n"
487                 "xorl %%edx, %%eax\n"
488                 "andl $0x200000, %%eax\n"
489                 "movl %%eax, %0"
490                 : "=r" (have_cpuid)
491                 :
492                 : "%eax", "%edx"
493         );
494 #else
495         __asm {
496                 pushfd
497                 pop eax
498                 mov edx, eax
499                 xor eax, 0x200000
500                 push eax
501                 popfd
502                 pushfd
503                 pop eax
504                 xor eax, edx
505                 and eax, 0x200000
506                 mov have_cpuid, eax
507         }
508 #endif
509         if (have_cpuid) {
510                 /* Have to use the code manager to get around WinXP DEP */
511                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
512                 CpuidFunc func;
513                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
514                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
515
516                 func = (CpuidFunc)ptr;
517                 func (id, p_eax, p_ebx, p_ecx, p_edx);
518
519                 mono_code_manager_destroy (codeman);
520
521                 /*
522                  * We use this approach because of issues with gcc and pic code, see:
523                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
524                 __asm__ __volatile__ ("cpuid"
525                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
526                         : "a" (id));
527                 */
528                 return 1;
529         }
530         return 0;
531 }
532
533 /*
534  * Initialize the cpu to execute managed code.
535  */
536 void
537 mono_arch_cpu_init (void)
538 {
539         /* spec compliance requires running with double precision */
540 #ifndef _MSC_VER
541         guint16 fpcw;
542
543         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
544         fpcw &= ~X86_FPCW_PRECC_MASK;
545         fpcw |= X86_FPCW_PREC_DOUBLE;
546         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
547         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
548 #else
549         _control87 (_PC_53, MCW_PC);
550 #endif
551 }
552
553 /*
554  * This function returns the optimizations supported on this cpu.
555  */
556 guint32
557 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
558 {
559         int eax, ebx, ecx, edx;
560         guint32 opts = 0;
561         
562         *exclude_mask = 0;
563         /* Feature Flags function, flags returned in EDX. */
564         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
565                 if (edx & (1 << 15)) {
566                         opts |= MONO_OPT_CMOV;
567                         if (edx & 1)
568                                 opts |= MONO_OPT_FCMOV;
569                         else
570                                 *exclude_mask |= MONO_OPT_FCMOV;
571                 } else
572                         *exclude_mask |= MONO_OPT_CMOV;
573         }
574         return opts;
575 }
576
577 /*
578  * Determine whenever the trap whose info is in SIGINFO is caused by
579  * integer overflow.
580  */
581 gboolean
582 mono_arch_is_int_overflow (void *sigctx, void *info)
583 {
584         MonoContext ctx;
585         guint8* ip;
586
587         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
588
589         ip = (guint8*)ctx.eip;
590
591         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
592                 gint32 reg;
593
594                 /* idiv REG */
595                 switch (x86_modrm_rm (ip [1])) {
596                 case X86_ECX:
597                         reg = ctx.ecx;
598                         break;
599                 case X86_EBX:
600                         reg = ctx.ebx;
601                         break;
602                 default:
603                         g_assert_not_reached ();
604                         reg = -1;
605                 }
606
607                 if (reg == -1)
608                         return TRUE;
609         }
610                         
611         return FALSE;
612 }
613
614 static gboolean
615 is_regsize_var (MonoType *t) {
616         if (t->byref)
617                 return TRUE;
618         switch (mono_type_get_underlying_type (t)->type) {
619         case MONO_TYPE_I4:
620         case MONO_TYPE_U4:
621         case MONO_TYPE_I:
622         case MONO_TYPE_U:
623         case MONO_TYPE_PTR:
624         case MONO_TYPE_FNPTR:
625                 return TRUE;
626         case MONO_TYPE_OBJECT:
627         case MONO_TYPE_STRING:
628         case MONO_TYPE_CLASS:
629         case MONO_TYPE_SZARRAY:
630         case MONO_TYPE_ARRAY:
631                 return TRUE;
632         case MONO_TYPE_VALUETYPE:
633                 return FALSE;
634         }
635         return FALSE;
636 }
637
638 GList *
639 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
640 {
641         GList *vars = NULL;
642         int i;
643
644         for (i = 0; i < cfg->num_varinfo; i++) {
645                 MonoInst *ins = cfg->varinfo [i];
646                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
647
648                 /* unused vars */
649                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
650                         continue;
651
652                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
653                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
654                         continue;
655
656                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
657                  * 8bit quantities in caller saved registers on x86 */
658                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
659                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
660                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
661                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
662                         g_assert (i == vmv->idx);
663                         vars = g_list_prepend (vars, vmv);
664                 }
665         }
666
667         vars = mono_varlist_sort (cfg, vars, 0);
668
669         return vars;
670 }
671
672 GList *
673 mono_arch_get_global_int_regs (MonoCompile *cfg)
674 {
675         GList *regs = NULL;
676
677         /* we can use 3 registers for global allocation */
678         regs = g_list_prepend (regs, (gpointer)X86_EBX);
679         regs = g_list_prepend (regs, (gpointer)X86_ESI);
680         regs = g_list_prepend (regs, (gpointer)X86_EDI);
681
682         return regs;
683 }
684
685 /*
686  * mono_arch_regalloc_cost:
687  *
688  *  Return the cost, in number of memory references, of the action of 
689  * allocating the variable VMV into a register during global register
690  * allocation.
691  */
692 guint32
693 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
694 {
695         MonoInst *ins = cfg->varinfo [vmv->idx];
696
697         if (cfg->method->save_lmf)
698                 /* The register is already saved */
699                 return (ins->opcode == OP_ARG) ? 1 : 0;
700         else
701                 /* push+pop+possible load if it is an argument */
702                 return (ins->opcode == OP_ARG) ? 3 : 2;
703 }
704  
705 /*
706  * Set var information according to the calling convention. X86 version.
707  * The locals var stuff should most likely be split in another method.
708  */
709 void
710 mono_arch_allocate_vars (MonoCompile *m)
711 {
712         MonoMethodSignature *sig;
713         MonoMethodHeader *header;
714         MonoInst *inst;
715         guint32 locals_stack_size, locals_stack_align;
716         int i, offset, curinst, size, align;
717         gint32 *offsets;
718         CallInfo *cinfo;
719
720         header = mono_method_get_header (m->method);
721         sig = mono_method_signature (m->method);
722
723         offset = 8;
724         curinst = 0;
725
726         cinfo = get_call_info (sig, FALSE);
727
728         switch (cinfo->ret.storage) {
729         case ArgOnStack:
730                 m->ret->opcode = OP_REGOFFSET;
731                 m->ret->inst_basereg = X86_EBP;
732                 m->ret->inst_offset = offset;
733                 offset += sizeof (gpointer);
734                 break;
735         case ArgValuetypeInReg:
736                 break;
737         case ArgInIReg:
738                 m->ret->opcode = OP_REGVAR;
739                 m->ret->inst_c0 = cinfo->ret.reg;
740                 break;
741         case ArgNone:
742         case ArgOnFloatFpStack:
743         case ArgOnDoubleFpStack:
744                 break;
745         default:
746                 g_assert_not_reached ();
747         }
748
749         if (sig->hasthis) {
750                 inst = m->varinfo [curinst];
751                 if (inst->opcode != OP_REGVAR) {
752                         inst->opcode = OP_REGOFFSET;
753                         inst->inst_basereg = X86_EBP;
754                 }
755                 inst->inst_offset = offset;
756                 offset += sizeof (gpointer);
757                 curinst++;
758         }
759
760         if (sig->call_convention == MONO_CALL_VARARG) {
761                 m->sig_cookie = offset;
762                 offset += sizeof (gpointer);
763         }
764
765         for (i = 0; i < sig->param_count; ++i) {
766                 inst = m->varinfo [curinst];
767                 if (inst->opcode != OP_REGVAR) {
768                         inst->opcode = OP_REGOFFSET;
769                         inst->inst_basereg = X86_EBP;
770                 }
771                 inst->inst_offset = offset;
772                 size = mono_type_size (sig->params [i], &align);
773                 size += 4 - 1;
774                 size &= ~(4 - 1);
775                 offset += size;
776                 curinst++;
777         }
778
779         offset = 0;
780
781         /* reserve space to save LMF and caller saved registers */
782
783         if (m->method->save_lmf) {
784                 offset += sizeof (MonoLMF);
785         } else {
786                 if (m->used_int_regs & (1 << X86_EBX)) {
787                         offset += 4;
788                 }
789
790                 if (m->used_int_regs & (1 << X86_EDI)) {
791                         offset += 4;
792                 }
793
794                 if (m->used_int_regs & (1 << X86_ESI)) {
795                         offset += 4;
796                 }
797         }
798
799         switch (cinfo->ret.storage) {
800         case ArgValuetypeInReg:
801                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
802                 offset += 8;
803                 m->ret->opcode = OP_REGOFFSET;
804                 m->ret->inst_basereg = X86_EBP;
805                 m->ret->inst_offset = - offset;
806                 break;
807         default:
808                 break;
809         }
810
811         /* Allocate locals */
812         offsets = mono_allocate_stack_slots (m, &locals_stack_size, &locals_stack_align);
813         if (locals_stack_align) {
814                 offset += (locals_stack_align - 1);
815                 offset &= ~(locals_stack_align - 1);
816         }
817         for (i = m->locals_start; i < m->num_varinfo; i++) {
818                 if (offsets [i] != -1) {
819                         MonoInst *inst = m->varinfo [i];
820                         inst->opcode = OP_REGOFFSET;
821                         inst->inst_basereg = X86_EBP;
822                         inst->inst_offset = - (offset + offsets [i]);
823                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
824                 }
825         }
826         g_free (offsets);
827         offset += locals_stack_size;
828
829         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
830         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
831
832         g_free (cinfo);
833
834         /* change sign? */
835         m->stack_offset = -offset;
836 }
837
838 void
839 mono_arch_create_vars (MonoCompile *cfg)
840 {
841         MonoMethodSignature *sig;
842         CallInfo *cinfo;
843
844         sig = mono_method_signature (cfg->method);
845
846         cinfo = get_call_info (sig, FALSE);
847
848         if (cinfo->ret.storage == ArgValuetypeInReg)
849                 cfg->ret_var_is_local = TRUE;
850
851         g_free (cinfo);
852 }
853
854 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
855  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
856  */
857
858 /* 
859  * take the arguments and generate the arch-specific
860  * instructions to properly call the function in call.
861  * This includes pushing, moving arguments to the right register
862  * etc.
863  * Issue: who does the spilling if needed, and when?
864  */
865 MonoCallInst*
866 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
867         MonoInst *arg, *in;
868         MonoMethodSignature *sig;
869         int i, n, stack_size, type;
870         MonoType *ptype;
871         CallInfo *cinfo;
872
873         stack_size = 0;
874         /* add the vararg cookie before the non-implicit args */
875         if (call->signature->call_convention == MONO_CALL_VARARG) {
876                 MonoInst *sig_arg;
877                 /* FIXME: Add support for signature tokens to AOT */
878                 cfg->disable_aot = TRUE;
879                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
880                 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
881                 sig_arg->inst_p0 = call->signature;
882                 arg->inst_left = sig_arg;
883                 arg->type = STACK_PTR;
884                 /* prepend, so they get reversed */
885                 arg->next = call->out_args;
886                 call->out_args = arg;
887                 stack_size += sizeof (gpointer);
888         }
889         sig = call->signature;
890         n = sig->param_count + sig->hasthis;
891
892         cinfo = get_call_info (sig, FALSE);
893
894         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
895                 if (cinfo->ret.storage == ArgOnStack)
896                         stack_size += sizeof (gpointer);
897         }
898
899         for (i = 0; i < n; ++i) {
900                 if (is_virtual && i == 0) {
901                         /* the argument will be attached to the call instrucion */
902                         in = call->args [i];
903                         stack_size += 4;
904                 } else {
905                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
906                         in = call->args [i];
907                         arg->cil_code = in->cil_code;
908                         arg->inst_left = in;
909                         arg->type = in->type;
910                         /* prepend, so they get reversed */
911                         arg->next = call->out_args;
912                         call->out_args = arg;
913                         if (i >= sig->hasthis) {
914                                 MonoType *t = sig->params [i - sig->hasthis];
915                                 ptype = mono_type_get_underlying_type (t);
916                                 if (t->byref)
917                                         type = MONO_TYPE_U;
918                                 else
919                                         type = ptype->type;
920                                 /* FIXME: validate arguments... */
921                                 switch (type) {
922                                 case MONO_TYPE_I:
923                                 case MONO_TYPE_U:
924                                 case MONO_TYPE_BOOLEAN:
925                                 case MONO_TYPE_CHAR:
926                                 case MONO_TYPE_I1:
927                                 case MONO_TYPE_U1:
928                                 case MONO_TYPE_I2:
929                                 case MONO_TYPE_U2:
930                                 case MONO_TYPE_I4:
931                                 case MONO_TYPE_U4:
932                                 case MONO_TYPE_STRING:
933                                 case MONO_TYPE_CLASS:
934                                 case MONO_TYPE_OBJECT:
935                                 case MONO_TYPE_PTR:
936                                 case MONO_TYPE_FNPTR:
937                                 case MONO_TYPE_ARRAY:
938                                 case MONO_TYPE_SZARRAY:
939                                         stack_size += 4;
940                                         break;
941                                 case MONO_TYPE_I8:
942                                 case MONO_TYPE_U8:
943                                         stack_size += 8;
944                                         break;
945                                 case MONO_TYPE_R4:
946                                         stack_size += 4;
947                                         arg->opcode = OP_OUTARG_R4;
948                                         break;
949                                 case MONO_TYPE_R8:
950                                         stack_size += 8;
951                                         arg->opcode = OP_OUTARG_R8;
952                                         break;
953                                 case MONO_TYPE_VALUETYPE: {
954                                         int size;
955                                         if (sig->pinvoke) 
956                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
957                                         else 
958                                                 size = mono_type_stack_size (&in->klass->byval_arg, NULL);
959
960                                         stack_size += size;
961                                         arg->opcode = OP_OUTARG_VT;
962                                         arg->klass = in->klass;
963                                         arg->unused = sig->pinvoke;
964                                         arg->inst_imm = size; 
965                                         break;
966                                 }
967                                 case MONO_TYPE_TYPEDBYREF:
968                                         stack_size += sizeof (MonoTypedRef);
969                                         arg->opcode = OP_OUTARG_VT;
970                                         arg->klass = in->klass;
971                                         arg->unused = sig->pinvoke;
972                                         arg->inst_imm = sizeof (MonoTypedRef); 
973                                         break;
974                                 default:
975                                         g_error ("unknown type 0x%02x in mono_arch_call_opcode\n", type);
976                                 }
977                         } else {
978                                 /* the this argument */
979                                 stack_size += 4;
980                         }
981                 }
982         }
983
984         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
985                 if (cinfo->ret.storage == ArgValuetypeInReg) {
986                         MonoInst *zero_inst;
987                         /*
988                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
989                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
990                          * before calling the function. So we add a dummy instruction to represent pushing the 
991                          * struct return address to the stack. The return address will be saved to this stack slot 
992                          * by the code emitted in this_vret_args.
993                          */
994                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
995                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
996                         zero_inst->inst_p0 = 0;
997                         arg->inst_left = zero_inst;
998                         arg->type = STACK_PTR;
999                         /* prepend, so they get reversed */
1000                         arg->next = call->out_args;
1001                         call->out_args = arg;
1002                 }
1003                 else
1004                         /* if the function returns a struct, the called method already does a ret $0x4 */
1005                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1006                                 stack_size -= 4;
1007         }
1008
1009         call->stack_usage = stack_size;
1010         g_free (cinfo);
1011
1012         /* 
1013          * should set more info in call, such as the stack space
1014          * used by the args that needs to be added back to esp
1015          */
1016
1017         return call;
1018 }
1019
1020 /*
1021  * Allow tracing to work with this interface (with an optional argument)
1022  */
1023 void*
1024 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1025 {
1026         guchar *code = p;
1027
1028         /* if some args are passed in registers, we need to save them here */
1029         x86_push_reg (code, X86_EBP);
1030
1031         if (cfg->compile_aot) {
1032                 x86_push_imm (code, cfg->method);
1033                 x86_mov_reg_imm (code, X86_EAX, func);
1034                 x86_call_reg (code, X86_EAX);
1035         } else {
1036                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1037                 x86_push_imm (code, cfg->method);
1038                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1039                 x86_call_code (code, 0);
1040         }
1041         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1042
1043         return code;
1044 }
1045
1046 enum {
1047         SAVE_NONE,
1048         SAVE_STRUCT,
1049         SAVE_EAX,
1050         SAVE_EAX_EDX,
1051         SAVE_FP
1052 };
1053
1054 void*
1055 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1056 {
1057         guchar *code = p;
1058         int arg_size = 0, save_mode = SAVE_NONE;
1059         MonoMethod *method = cfg->method;
1060         
1061         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1062         case MONO_TYPE_VOID:
1063                 /* special case string .ctor icall */
1064                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1065                         save_mode = SAVE_EAX;
1066                 else
1067                         save_mode = SAVE_NONE;
1068                 break;
1069         case MONO_TYPE_I8:
1070         case MONO_TYPE_U8:
1071                 save_mode = SAVE_EAX_EDX;
1072                 break;
1073         case MONO_TYPE_R4:
1074         case MONO_TYPE_R8:
1075                 save_mode = SAVE_FP;
1076                 break;
1077         case MONO_TYPE_VALUETYPE:
1078                 save_mode = SAVE_STRUCT;
1079                 break;
1080         default:
1081                 save_mode = SAVE_EAX;
1082                 break;
1083         }
1084
1085         switch (save_mode) {
1086         case SAVE_EAX_EDX:
1087                 x86_push_reg (code, X86_EDX);
1088                 x86_push_reg (code, X86_EAX);
1089                 if (enable_arguments) {
1090                         x86_push_reg (code, X86_EDX);
1091                         x86_push_reg (code, X86_EAX);
1092                         arg_size = 8;
1093                 }
1094                 break;
1095         case SAVE_EAX:
1096                 x86_push_reg (code, X86_EAX);
1097                 if (enable_arguments) {
1098                         x86_push_reg (code, X86_EAX);
1099                         arg_size = 4;
1100                 }
1101                 break;
1102         case SAVE_FP:
1103                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1104                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1105                 if (enable_arguments) {
1106                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1107                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1108                         arg_size = 8;
1109                 }
1110                 break;
1111         case SAVE_STRUCT:
1112                 if (enable_arguments) {
1113                         x86_push_membase (code, X86_EBP, 8);
1114                         arg_size = 4;
1115                 }
1116                 break;
1117         case SAVE_NONE:
1118         default:
1119                 break;
1120         }
1121
1122         if (cfg->compile_aot) {
1123                 x86_push_imm (code, method);
1124                 x86_mov_reg_imm (code, X86_EAX, func);
1125                 x86_call_reg (code, X86_EAX);
1126         } else {
1127                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1128                 x86_push_imm (code, method);
1129                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1130                 x86_call_code (code, 0);
1131         }
1132         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1133
1134         switch (save_mode) {
1135         case SAVE_EAX_EDX:
1136                 x86_pop_reg (code, X86_EAX);
1137                 x86_pop_reg (code, X86_EDX);
1138                 break;
1139         case SAVE_EAX:
1140                 x86_pop_reg (code, X86_EAX);
1141                 break;
1142         case SAVE_FP:
1143                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1144                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1145                 break;
1146         case SAVE_NONE:
1147         default:
1148                 break;
1149         }
1150
1151         return code;
1152 }
1153
1154 #define EMIT_COND_BRANCH(ins,cond,sign) \
1155 if (ins->flags & MONO_INST_BRLABEL) { \
1156         if (ins->inst_i0->inst_c0) { \
1157                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1158         } else { \
1159                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1160                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1161                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1162                         x86_branch8 (code, cond, 0, sign); \
1163                 else \
1164                         x86_branch32 (code, cond, 0, sign); \
1165         } \
1166 } else { \
1167         if (ins->inst_true_bb->native_offset) { \
1168                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1169         } else { \
1170                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1171                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1172                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1173                         x86_branch8 (code, cond, 0, sign); \
1174                 else \
1175                         x86_branch32 (code, cond, 0, sign); \
1176         } \
1177 }
1178
1179 /* emit an exception if condition is fail */
1180 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1181         do {                                                        \
1182                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1183                                     MONO_PATCH_INFO_EXC, exc_name);  \
1184                 x86_branch32 (code, cond, 0, signed);               \
1185         } while (0); 
1186
1187 #define EMIT_FPCOMPARE(code) do { \
1188         x86_fcompp (code); \
1189         x86_fnstsw (code); \
1190 } while (0); 
1191
1192
1193 static guint8*
1194 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1195 {
1196         if (cfg->compile_aot) {
1197                 guint32 got_reg = X86_EAX;
1198
1199                 if (cfg->compile_aot) {          
1200                         /*
1201                          * Since the patches are generated by the back end, there is
1202                          * no way to generate a got_var at this point.
1203                          */
1204                         g_assert (cfg->got_var);
1205
1206                         if (cfg->got_var->opcode == OP_REGOFFSET)
1207                                 x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
1208                         else
1209                                 got_reg = cfg->got_var->dreg;
1210                 }
1211
1212                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1213                 x86_call_membase (code, got_reg, 0xf0f0f0f0);
1214         }
1215         else {
1216                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1217                 x86_call_code (code, 0);
1218         }
1219
1220         return code;
1221 }
1222
1223 /* FIXME: Add more instructions */
1224 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1225
1226 static void
1227 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1228 {
1229         MonoInst *ins, *last_ins = NULL;
1230         ins = bb->code;
1231
1232         while (ins) {
1233
1234                 switch (ins->opcode) {
1235                 case OP_ICONST:
1236                         /* reg = 0 -> XOR (reg, reg) */
1237                         /* XOR sets cflags on x86, so we cant do it always */
1238                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1239                                 ins->opcode = CEE_XOR;
1240                                 ins->sreg1 = ins->dreg;
1241                                 ins->sreg2 = ins->dreg;
1242                         }
1243                         break;
1244                 case OP_MUL_IMM: 
1245                         /* remove unnecessary multiplication with 1 */
1246                         if (ins->inst_imm == 1) {
1247                                 if (ins->dreg != ins->sreg1) {
1248                                         ins->opcode = OP_MOVE;
1249                                 } else {
1250                                         last_ins->next = ins->next;
1251                                         ins = ins->next;
1252                                         continue;
1253                                 }
1254                         }
1255                         break;
1256                 case OP_COMPARE_IMM:
1257                         /* OP_COMPARE_IMM (reg, 0) 
1258                          * --> 
1259                          * OP_X86_TEST_NULL (reg) 
1260                          */
1261                         if (!ins->inst_imm)
1262                                 ins->opcode = OP_X86_TEST_NULL;
1263                         break;
1264                 case OP_X86_COMPARE_MEMBASE_IMM:
1265                         /* 
1266                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1267                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1268                          * -->
1269                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1270                          * OP_COMPARE_IMM reg, imm
1271                          *
1272                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1273                          */
1274                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1275                             ins->inst_basereg == last_ins->inst_destbasereg &&
1276                             ins->inst_offset == last_ins->inst_offset) {
1277                                         ins->opcode = OP_COMPARE_IMM;
1278                                         ins->sreg1 = last_ins->sreg1;
1279
1280                                         /* check if we can remove cmp reg,0 with test null */
1281                                         if (!ins->inst_imm)
1282                                                 ins->opcode = OP_X86_TEST_NULL;
1283                                 }
1284
1285                         break;
1286                 case OP_LOAD_MEMBASE:
1287                 case OP_LOADI4_MEMBASE:
1288                         /* 
1289                          * Note: if reg1 = reg2 the load op is removed
1290                          *
1291                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1292                          * OP_LOAD_MEMBASE offset(basereg), reg2
1293                          * -->
1294                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1295                          * OP_MOVE reg1, reg2
1296                          */
1297                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1298                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1299                             ins->inst_basereg == last_ins->inst_destbasereg &&
1300                             ins->inst_offset == last_ins->inst_offset) {
1301                                 if (ins->dreg == last_ins->sreg1) {
1302                                         last_ins->next = ins->next;                             
1303                                         ins = ins->next;                                
1304                                         continue;
1305                                 } else {
1306                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1307                                         ins->opcode = OP_MOVE;
1308                                         ins->sreg1 = last_ins->sreg1;
1309                                 }
1310
1311                         /* 
1312                          * Note: reg1 must be different from the basereg in the second load
1313                          * Note: if reg1 = reg2 is equal then second load is removed
1314                          *
1315                          * OP_LOAD_MEMBASE offset(basereg), reg1
1316                          * OP_LOAD_MEMBASE offset(basereg), reg2
1317                          * -->
1318                          * OP_LOAD_MEMBASE offset(basereg), reg1
1319                          * OP_MOVE reg1, reg2
1320                          */
1321                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1322                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1323                               ins->inst_basereg != last_ins->dreg &&
1324                               ins->inst_basereg == last_ins->inst_basereg &&
1325                               ins->inst_offset == last_ins->inst_offset) {
1326
1327                                 if (ins->dreg == last_ins->dreg) {
1328                                         last_ins->next = ins->next;                             
1329                                         ins = ins->next;                                
1330                                         continue;
1331                                 } else {
1332                                         ins->opcode = OP_MOVE;
1333                                         ins->sreg1 = last_ins->dreg;
1334                                 }
1335
1336                                 //g_assert_not_reached ();
1337
1338 #if 0
1339                         /* 
1340                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1341                          * OP_LOAD_MEMBASE offset(basereg), reg
1342                          * -->
1343                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1344                          * OP_ICONST reg, imm
1345                          */
1346                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1347                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1348                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1349                                    ins->inst_offset == last_ins->inst_offset) {
1350                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1351                                 ins->opcode = OP_ICONST;
1352                                 ins->inst_c0 = last_ins->inst_imm;
1353                                 g_assert_not_reached (); // check this rule
1354 #endif
1355                         }
1356                         break;
1357                 case OP_LOADU1_MEMBASE:
1358                 case OP_LOADI1_MEMBASE:
1359                         /* 
1360                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1361                          * OP_LOAD_MEMBASE offset(basereg), reg2
1362                          * -->
1363                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1364                          * CONV_I2/U2 reg1, reg2
1365                          */
1366                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1367                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1368                                         ins->inst_offset == last_ins->inst_offset) {
1369                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1370                                 ins->sreg1 = last_ins->sreg1;
1371                         }
1372                         break;
1373                 case OP_LOADU2_MEMBASE:
1374                 case OP_LOADI2_MEMBASE:
1375                         /* 
1376                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1377                          * OP_LOAD_MEMBASE offset(basereg), reg2
1378                          * -->
1379                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1380                          * CONV_I2/U2 reg1, reg2
1381                          */
1382                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1383                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1384                                         ins->inst_offset == last_ins->inst_offset) {
1385                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1386                                 ins->sreg1 = last_ins->sreg1;
1387                         }
1388                         break;
1389                 case CEE_CONV_I4:
1390                 case CEE_CONV_U4:
1391                 case OP_MOVE:
1392                         /*
1393                          * Removes:
1394                          *
1395                          * OP_MOVE reg, reg 
1396                          */
1397                         if (ins->dreg == ins->sreg1) {
1398                                 if (last_ins)
1399                                         last_ins->next = ins->next;                             
1400                                 ins = ins->next;
1401                                 continue;
1402                         }
1403                         /* 
1404                          * Removes:
1405                          *
1406                          * OP_MOVE sreg, dreg 
1407                          * OP_MOVE dreg, sreg
1408                          */
1409                         if (last_ins && last_ins->opcode == OP_MOVE &&
1410                             ins->sreg1 == last_ins->dreg &&
1411                             ins->dreg == last_ins->sreg1) {
1412                                 last_ins->next = ins->next;                             
1413                                 ins = ins->next;                                
1414                                 continue;
1415                         }
1416                         break;
1417                         
1418                 case OP_X86_PUSH_MEMBASE:
1419                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1420                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1421                             ins->inst_basereg == last_ins->inst_destbasereg &&
1422                             ins->inst_offset == last_ins->inst_offset) {
1423                                     ins->opcode = OP_X86_PUSH;
1424                                     ins->sreg1 = last_ins->sreg1;
1425                         }
1426                         break;
1427                 }
1428                 last_ins = ins;
1429                 ins = ins->next;
1430         }
1431         bb->last_ins = last_ins;
1432 }
1433
1434 static const int 
1435 branch_cc_table [] = {
1436         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1437         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1438         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1439 };
1440
1441 #define DEBUG(a) if (cfg->verbose_level > 1) a
1442 //#define DEBUG(a)
1443
1444 /*
1445  * returns the offset used by spillvar. It allocates a new
1446  * spill variable if necessary. 
1447  */
1448 static int
1449 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1450 {
1451         MonoSpillInfo **si, *info;
1452         int i = 0;
1453
1454         si = &cfg->spill_info; 
1455         
1456         while (i <= spillvar) {
1457
1458                 if (!*si) {
1459                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1460                         info->next = NULL;
1461                         cfg->stack_offset -= sizeof (gpointer);
1462                         info->offset = cfg->stack_offset;
1463                 }
1464
1465                 if (i == spillvar)
1466                         return (*si)->offset;
1467
1468                 i++;
1469                 si = &(*si)->next;
1470         }
1471
1472         g_assert_not_reached ();
1473         return 0;
1474 }
1475
1476 /*
1477  * returns the offset used by spillvar. It allocates a new
1478  * spill float variable if necessary. 
1479  * (same as mono_spillvar_offset but for float)
1480  */
1481 static int
1482 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1483 {
1484         MonoSpillInfo **si, *info;
1485         int i = 0;
1486
1487         si = &cfg->spill_info_float; 
1488         
1489         while (i <= spillvar) {
1490
1491                 if (!*si) {
1492                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1493                         info->next = NULL;
1494                         cfg->stack_offset -= sizeof (double);
1495                         info->offset = cfg->stack_offset;
1496                 }
1497
1498                 if (i == spillvar)
1499                         return (*si)->offset;
1500
1501                 i++;
1502                 si = &(*si)->next;
1503         }
1504
1505         g_assert_not_reached ();
1506         return 0;
1507 }
1508
1509 /*
1510  * Creates a store for spilled floating point items
1511  */
1512 static MonoInst*
1513 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1514 {
1515         MonoInst *store;
1516         MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1517         store->sreg1 = reg;
1518         store->inst_destbasereg = X86_EBP;
1519         store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1520
1521         DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08x(%%sp)) (from %d)\n", spill, store->inst_offset, reg));
1522         return store;
1523 }
1524
1525 /*
1526  * Creates a load for spilled floating point items 
1527  */
1528 static MonoInst*
1529 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1530 {
1531         MonoInst *load;
1532         MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1533         load->dreg = reg;
1534         load->inst_basereg = X86_EBP;
1535         load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1536
1537         DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08x(%%sp)) (from %d)\n", spill, load->inst_offset, reg));
1538         return load;
1539 }
1540
1541 #define is_global_ireg(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && !X86_IS_CALLEE ((r)))
1542 #define reg_is_freeable(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && X86_IS_CALLEE ((r)))
1543
1544 typedef struct {
1545         int born_in;
1546         int killed_in;
1547         int last_use;
1548         int prev_use;
1549         int flags;              /* used to track fp spill/load */
1550 } RegTrack;
1551
1552 static const char*const * ins_spec = pentium_desc;
1553
1554 static void
1555 print_ins (int i, MonoInst *ins)
1556 {
1557         const char *spec = ins_spec [ins->opcode];
1558         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1559         if (spec [MONO_INST_DEST]) {
1560                 if (ins->dreg >= MONO_MAX_IREGS)
1561                         g_print (" R%d <-", ins->dreg);
1562                 else
1563                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1564         }
1565         if (spec [MONO_INST_SRC1]) {
1566                 if (ins->sreg1 >= MONO_MAX_IREGS)
1567                         g_print (" R%d", ins->sreg1);
1568                 else
1569                         g_print (" %s", mono_arch_regname (ins->sreg1));
1570         }
1571         if (spec [MONO_INST_SRC2]) {
1572                 if (ins->sreg2 >= MONO_MAX_IREGS)
1573                         g_print (" R%d", ins->sreg2);
1574                 else
1575                         g_print (" %s", mono_arch_regname (ins->sreg2));
1576         }
1577         if (spec [MONO_INST_CLOB])
1578                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1579         g_print ("\n");
1580 }
1581
1582 static void
1583 print_regtrack (RegTrack *t, int num)
1584 {
1585         int i;
1586         char buf [32];
1587         const char *r;
1588         
1589         for (i = 0; i < num; ++i) {
1590                 if (!t [i].born_in)
1591                         continue;
1592                 if (i >= MONO_MAX_IREGS) {
1593                         g_snprintf (buf, sizeof(buf), "R%d", i);
1594                         r = buf;
1595                 } else
1596                         r = mono_arch_regname (i);
1597                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1598         }
1599 }
1600
1601 typedef struct InstList InstList;
1602
1603 struct InstList {
1604         InstList *prev;
1605         InstList *next;
1606         MonoInst *data;
1607 };
1608
1609 static inline InstList*
1610 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1611 {
1612         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1613         item->data = data;
1614         item->prev = NULL;
1615         item->next = list;
1616         if (list)
1617                 list->prev = item;
1618         return item;
1619 }
1620
1621 /*
1622  * Force the spilling of the variable in the symbolic register 'reg'.
1623  */
1624 static int
1625 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1626 {
1627         MonoInst *load;
1628         int i, sel, spill;
1629         
1630         sel = cfg->rs->iassign [reg];
1631         /*i = cfg->rs->isymbolic [sel];
1632         g_assert (i == reg);*/
1633         i = reg;
1634         spill = ++cfg->spill_count;
1635         cfg->rs->iassign [i] = -spill - 1;
1636         mono_regstate_free_int (cfg->rs, sel);
1637         /* we need to create a spill var and insert a load to sel after the current instruction */
1638         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1639         load->dreg = sel;
1640         load->inst_basereg = X86_EBP;
1641         load->inst_offset = mono_spillvar_offset (cfg, spill);
1642         if (item->prev) {
1643                 while (ins->next != item->prev->data)
1644                         ins = ins->next;
1645         }
1646         load->next = ins->next;
1647         ins->next = load;
1648         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1649         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1650         g_assert (i == sel);
1651
1652         return sel;
1653 }
1654
1655 static int
1656 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1657 {
1658         MonoInst *load;
1659         int i, sel, spill;
1660
1661         DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1662         /* exclude the registers in the current instruction */
1663         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1664                 if (ins->sreg1 >= MONO_MAX_IREGS)
1665                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1666                 else
1667                         regmask &= ~ (1 << ins->sreg1);
1668                 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1669         }
1670         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1671                 if (ins->sreg2 >= MONO_MAX_IREGS)
1672                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1673                 else
1674                         regmask &= ~ (1 << ins->sreg2);
1675                 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1676         }
1677         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1678                 regmask &= ~ (1 << ins->dreg);
1679                 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1680         }
1681
1682         DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1683         g_assert (regmask); /* need at least a register we can free */
1684         sel = -1;
1685         /* we should track prev_use and spill the register that's farther */
1686         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1687                 if (regmask & (1 << i)) {
1688                         sel = i;
1689                         DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1690                         break;
1691                 }
1692         }
1693         i = cfg->rs->isymbolic [sel];
1694         spill = ++cfg->spill_count;
1695         cfg->rs->iassign [i] = -spill - 1;
1696         mono_regstate_free_int (cfg->rs, sel);
1697         /* we need to create a spill var and insert a load to sel after the current instruction */
1698         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1699         load->dreg = sel;
1700         load->inst_basereg = X86_EBP;
1701         load->inst_offset = mono_spillvar_offset (cfg, spill);
1702         if (item->prev) {
1703                 while (ins->next != item->prev->data)
1704                         ins = ins->next;
1705         }
1706         load->next = ins->next;
1707         ins->next = load;
1708         DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1709         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1710         g_assert (i == sel);
1711         
1712         return sel;
1713 }
1714
1715 static MonoInst*
1716 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1717 {
1718         MonoInst *copy;
1719         MONO_INST_NEW (cfg, copy, OP_MOVE);
1720         copy->dreg = dest;
1721         copy->sreg1 = src;
1722         if (ins) {
1723                 copy->next = ins->next;
1724                 ins->next = copy;
1725         }
1726         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1727         return copy;
1728 }
1729
1730 static MonoInst*
1731 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1732 {
1733         MonoInst *store;
1734         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1735         store->sreg1 = reg;
1736         store->inst_destbasereg = X86_EBP;
1737         store->inst_offset = mono_spillvar_offset (cfg, spill);
1738         if (ins) {
1739                 store->next = ins->next;
1740                 ins->next = store;
1741         }
1742         DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08x(%%ebp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
1743         return store;
1744 }
1745
1746 static void
1747 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1748 {
1749         MonoInst *prev;
1750         if (item->next) {
1751                 prev = item->next->data;
1752
1753                 while (prev->next != ins)
1754                         prev = prev->next;
1755                 to_insert->next = ins;
1756                 prev->next = to_insert;
1757         } else {
1758                 to_insert->next = ins;
1759         }
1760         /* 
1761          * needed otherwise in the next instruction we can add an ins to the 
1762          * end and that would get past this instruction.
1763          */
1764         item->data = to_insert; 
1765 }
1766
1767
1768 #if  0
1769 static int
1770 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1771 {
1772         int val = cfg->rs->iassign [sym_reg];
1773         if (val < 0) {
1774                 int spill = 0;
1775                 if (val < -1) {
1776                         /* the register gets spilled after this inst */
1777                         spill = -val -1;
1778                 }
1779                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1780                 if (val < 0)
1781                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1782                 cfg->rs->iassign [sym_reg] = val;
1783                 /* add option to store before the instruction for src registers */
1784                 if (spill)
1785                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1786         }
1787         cfg->rs->isymbolic [val] = sym_reg;
1788         return val;
1789 }
1790 #endif
1791
1792 /* flags used in reginfo->flags */
1793 enum {
1794         MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
1795         MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
1796         MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
1797         MONO_X86_REG_NOT_ECX                    = 1 << 3,
1798         MONO_X86_REG_EAX                                = 1 << 4,
1799         MONO_X86_REG_EDX                                = 1 << 5,
1800         MONO_X86_REG_ECX                                = 1 << 6
1801 };
1802
1803 static int
1804 mono_x86_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1805 {
1806         int val;
1807         int test_mask = dest_mask;
1808
1809         if (flags & MONO_X86_REG_EAX)
1810                 test_mask &= (1 << X86_EAX);
1811         else if (flags & MONO_X86_REG_EDX)
1812                 test_mask &= (1 << X86_EDX);
1813         else if (flags & MONO_X86_REG_ECX)
1814                 test_mask &= (1 << X86_ECX);
1815         else if (flags & MONO_X86_REG_NOT_ECX)
1816                 test_mask &= ~ (1 << X86_ECX);
1817
1818         val = mono_regstate_alloc_int (cfg->rs, test_mask);
1819         if (val >= 0 && test_mask != dest_mask)
1820                 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1821
1822         if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1823                 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1824                 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << X86_ECX)));
1825         }
1826
1827         if (val < 0) {
1828                 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1829                 if (val < 0)
1830                         val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1831         }
1832
1833         return val;
1834 }
1835
1836 static inline void
1837 assign_ireg (MonoRegState *rs, int reg, int hreg)
1838 {
1839         g_assert (reg >= MONO_MAX_IREGS);
1840         g_assert (hreg < MONO_MAX_IREGS);
1841         g_assert (! is_global_ireg (hreg));
1842
1843         rs->iassign [reg] = hreg;
1844         rs->isymbolic [hreg] = reg;
1845         rs->ifree_mask &= ~ (1 << hreg);
1846 }
1847
1848 /*#include "cprop.c"*/
1849
1850 /*
1851  * Local register allocation.
1852  * We first scan the list of instructions and we save the liveness info of
1853  * each register (when the register is first used, when it's value is set etc.).
1854  * We also reverse the list of instructions (in the InstList list) because assigning
1855  * registers backwards allows for more tricks to be used.
1856  */
1857 void
1858 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1859 {
1860         MonoInst *ins;
1861         MonoRegState *rs = cfg->rs;
1862         int i, val, fpcount;
1863         RegTrack *reginfo, *reginfof;
1864         RegTrack *reginfo1, *reginfo2, *reginfod;
1865         InstList *tmp, *reversed = NULL;
1866         const char *spec;
1867         guint32 src1_mask, src2_mask, dest_mask;
1868         GList *fspill_list = NULL;
1869         int fspill = 0;
1870
1871         if (!bb->code)
1872                 return;
1873         rs->next_vireg = bb->max_ireg;
1874         rs->next_vfreg = bb->max_freg;
1875         mono_regstate_assign (rs);
1876         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1877         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1878         rs->ifree_mask = X86_CALLEE_REGS;
1879
1880         ins = bb->code;
1881
1882         /*if (cfg->opt & MONO_OPT_COPYPROP)
1883                 local_copy_prop (cfg, ins);*/
1884
1885         i = 1;
1886         fpcount = 0;
1887         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1888         /* forward pass on the instructions to collect register liveness info */
1889         while (ins) {
1890                 spec = ins_spec [ins->opcode];
1891                 
1892                 DEBUG (print_ins (i, ins));
1893
1894                 if (spec [MONO_INST_SRC1]) {
1895                         if (spec [MONO_INST_SRC1] == 'f') {
1896                                 GList *spill;
1897                                 reginfo1 = reginfof;
1898
1899                                 spill = g_list_first (fspill_list);
1900                                 if (spill && fpcount < MONO_MAX_FREGS) {
1901                                         reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1902                                         fspill_list = g_list_remove (fspill_list, spill->data);
1903                                 } else
1904                                         fpcount--;
1905                         }
1906                         else
1907                                 reginfo1 = reginfo;
1908                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1909                         reginfo1 [ins->sreg1].last_use = i;
1910                         if (spec [MONO_INST_SRC1] == 'L') {
1911                                 /* The virtual register is allocated sequentially */
1912                                 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1913                                 reginfo1 [ins->sreg1 + 1].last_use = i;
1914                                 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1915                                         reginfo1 [ins->sreg1 + 1].born_in = i;
1916
1917                                 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1918                                 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1919                         }
1920                 } else {
1921                         ins->sreg1 = -1;
1922                 }
1923                 if (spec [MONO_INST_SRC2]) {
1924                         if (spec [MONO_INST_SRC2] == 'f') {
1925                                 GList *spill;
1926                                 reginfo2 = reginfof;
1927                                 spill = g_list_first (fspill_list);
1928                                 if (spill) {
1929                                         reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1930                                         fspill_list = g_list_remove (fspill_list, spill->data);
1931                                         if (fpcount >= MONO_MAX_FREGS) {
1932                                                 fspill++;
1933                                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1934                                                 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1935                                         }
1936                                 } else
1937                                         fpcount--;
1938                         }
1939                         else
1940                                 reginfo2 = reginfo;
1941                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1942                         reginfo2 [ins->sreg2].last_use = i;
1943                         if (spec [MONO_INST_SRC2] == 'L') {
1944                                 /* The virtual register is allocated sequentially */
1945                                 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1946                                 reginfo2 [ins->sreg2 + 1].last_use = i;
1947                                 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1948                                         reginfo2 [ins->sreg2 + 1].born_in = i;
1949                         }
1950                         if (spec [MONO_INST_CLOB] == 's') {
1951                                 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1952                                 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1953                         }
1954                 } else {
1955                         ins->sreg2 = -1;
1956                 }
1957                 if (spec [MONO_INST_DEST]) {
1958                         if (spec [MONO_INST_DEST] == 'f') {
1959                                 reginfod = reginfof;
1960                                 if (fpcount >= MONO_MAX_FREGS) {
1961                                         reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1962                                         fspill++;
1963                                         fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1964                                         fpcount--;
1965                                 }
1966                                 fpcount++;
1967                         }
1968                         else
1969                                 reginfod = reginfo;
1970                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1971                                 reginfod [ins->dreg].killed_in = i;
1972                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1973                         reginfod [ins->dreg].last_use = i;
1974                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1975                                 reginfod [ins->dreg].born_in = i;
1976                         if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
1977                                 /* The virtual register is allocated sequentially */
1978                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1979                                 reginfod [ins->dreg + 1].last_use = i;
1980                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1981                                         reginfod [ins->dreg + 1].born_in = i;
1982
1983                                 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
1984                                 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
1985                         }
1986                 } else {
1987                         ins->dreg = -1;
1988                 }
1989
1990                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
1991                 ++i;
1992                 ins = ins->next;
1993         }
1994
1995         // todo: check if we have anything left on fp stack, in verify mode?
1996         fspill = 0;
1997
1998         DEBUG (print_regtrack (reginfo, rs->next_vireg));
1999         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
2000         tmp = reversed;
2001         while (tmp) {
2002                 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
2003                 dest_mask = src1_mask = src2_mask = X86_CALLEE_REGS;
2004                 --i;
2005                 ins = tmp->data;
2006                 spec = ins_spec [ins->opcode];
2007                 prev_dreg = -1;
2008                 clob_dreg = -1;
2009                 DEBUG (g_print ("processing:"));
2010                 DEBUG (print_ins (i, ins));
2011                 if (spec [MONO_INST_CLOB] == 's') {
2012                         /*
2013                          * Shift opcodes, SREG2 must be RCX
2014                          */
2015                         if (rs->ifree_mask & (1 << X86_ECX)) {
2016                                 if (ins->sreg2 < MONO_MAX_IREGS) {
2017                                         /* Argument already in hard reg, need to copy */
2018                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
2019                                         insert_before_ins (ins, tmp, copy);
2020                                 }
2021                                 else {
2022                                         DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
2023                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2024                                 }
2025                         } else {
2026                                 int need_ecx_spill = TRUE;
2027                                 /* 
2028                                  * we first check if src1/dreg is already assigned a register
2029                                  * and then we force a spill of the var assigned to ECX.
2030                                  */
2031                                 /* the destination register can't be ECX */
2032                                 dest_mask &= ~ (1 << X86_ECX);
2033                                 src1_mask &= ~ (1 << X86_ECX);
2034                                 val = rs->iassign [ins->dreg];
2035                                 /* 
2036                                  * the destination register is already assigned to ECX:
2037                                  * we need to allocate another register for it and then
2038                                  * copy from this to ECX.
2039                                  */
2040                                 if (val == X86_ECX && ins->dreg != ins->sreg2) {
2041                                         int new_dest;
2042                                         new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2043                                         g_assert (new_dest >= 0);
2044                                         DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
2045
2046                                         rs->isymbolic [new_dest] = ins->dreg;
2047                                         rs->iassign [ins->dreg] = new_dest;
2048                                         clob_dreg = ins->dreg;
2049                                         ins->dreg = new_dest;
2050                                         create_copy_ins (cfg, X86_ECX, new_dest, ins);
2051                                         need_ecx_spill = FALSE;
2052                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
2053                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
2054                                         rs->iassign [ins->dreg] = val;
2055                                         rs->isymbolic [val] = prev_dreg;
2056                                         ins->dreg = val;*/
2057                                 }
2058                                 if (is_global_ireg (ins->sreg2)) {
2059                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
2060                                         insert_before_ins (ins, tmp, copy);
2061                                 }
2062                                 else {
2063                                         val = rs->iassign [ins->sreg2];
2064                                         if (val >= 0 && val != X86_ECX) {
2065                                                 MonoInst *move = create_copy_ins (cfg, X86_ECX, val, NULL);
2066                                                 DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
2067                                                 move->next = ins;
2068                                                 g_assert_not_reached ();
2069                                                 /* FIXME: where is move connected to the instruction list? */
2070                                                 //tmp->prev->data->next = move;
2071                                         }
2072                                         else {
2073                                                 if (val == X86_ECX)
2074                                                 need_ecx_spill = FALSE;
2075                                         }
2076                                 }
2077                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << X86_ECX))) {
2078                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_ECX]));
2079                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_ECX]);
2080                                         mono_regstate_free_int (rs, X86_ECX);
2081                                 }
2082                                 if (!is_global_ireg (ins->sreg2))
2083                                         /* force-set sreg2 */
2084                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2085                         }
2086                         ins->sreg2 = X86_ECX;
2087                 } else if (spec [MONO_INST_CLOB] == 'd') {
2088                         /*
2089                          * DIVISION/REMAINER
2090                          */
2091                         int dest_reg = X86_EAX;
2092                         int clob_reg = X86_EDX;
2093                         if (spec [MONO_INST_DEST] == 'd') {
2094                                 dest_reg = X86_EDX; /* reminder */
2095                                 clob_reg = X86_EAX;
2096                         }
2097                         if (is_global_ireg (ins->dreg))
2098                                 val = ins->dreg;
2099                         else
2100                                 val = rs->iassign [ins->dreg];
2101                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
2102                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2103                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2104                                 mono_regstate_free_int (rs, dest_reg);
2105                         }
2106                         if (val < 0) {
2107                                 if (val < -1) {
2108                                         /* the register gets spilled after this inst */
2109                                         int spill = -val -1;
2110                                         dest_mask = 1 << dest_reg;
2111                                         prev_dreg = ins->dreg;
2112                                         val = mono_regstate_alloc_int (rs, dest_mask);
2113                                         if (val < 0)
2114                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
2115                                         rs->iassign [ins->dreg] = val;
2116                                         if (spill)
2117                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
2118                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2119                                         rs->isymbolic [val] = prev_dreg;
2120                                         ins->dreg = val;
2121                                 } else {
2122                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
2123                                         prev_dreg = ins->dreg;
2124                                         assign_ireg (rs, ins->dreg, dest_reg);
2125                                         ins->dreg = dest_reg;
2126                                         val = dest_reg;
2127                                 }
2128                         }
2129
2130                         //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
2131                         if (val != dest_reg) { /* force a copy */
2132                                 create_copy_ins (cfg, val, dest_reg, ins);
2133                                 if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
2134                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2135                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2136                                         mono_regstate_free_int (rs, dest_reg);
2137                                 }
2138                         }
2139                         if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
2140                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2141                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2142                                 mono_regstate_free_int (rs, clob_reg);
2143                         }
2144                         src1_mask = 1 << X86_EAX;
2145                         src2_mask = 1 << X86_ECX;
2146                 } else if (spec [MONO_INST_DEST] == 'l') {
2147                         int hreg;
2148                         val = rs->iassign [ins->dreg];
2149                         /* check special case when dreg have been moved from ecx (clob shift) */
2150                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2151                                 hreg = clob_dreg + 1;
2152                         else
2153                                 hreg = ins->dreg + 1;
2154
2155                         /* base prev_dreg on fixed hreg, handle clob case */
2156                         val = hreg - 1;
2157
2158                         if (val != rs->isymbolic [X86_EAX] && !(rs->ifree_mask & (1 << X86_EAX))) {
2159                                 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [X86_EAX]));
2160                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2161                                 mono_regstate_free_int (rs, X86_EAX);
2162                         }
2163                         if (hreg != rs->isymbolic [X86_EDX] && !(rs->ifree_mask & (1 << X86_EDX))) {
2164                                 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [X86_EDX]));
2165                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
2166                                 mono_regstate_free_int (rs, X86_EDX);
2167                         }
2168                 } else if (spec [MONO_INST_CLOB] == 'b') {
2169                         /*
2170                          * x86_set_reg instructions, dreg needs to be EAX..EDX
2171                          */     
2172                         dest_mask = (1 << X86_EAX) | (1 << X86_EBX) | (1 << X86_ECX) | (1 << X86_EDX);
2173                         if ((ins->dreg < MONO_MAX_IREGS) && (! (dest_mask & (1 << ins->dreg)))) {
2174                                 /* 
2175                                  * ins->dreg is already a hard reg, need to allocate another
2176                                  * suitable hard reg and make a copy.
2177                                  */
2178                                 int new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2179                                 g_assert (new_dest >= 0);
2180
2181                                 create_copy_ins (cfg, ins->dreg, new_dest, ins);
2182                                 DEBUG (g_print ("\tclob:b changing dreg R%d to %s\n", ins->dreg, mono_arch_regname (new_dest)));
2183                                 ins->dreg = new_dest;
2184
2185                                 /* The hard reg is no longer needed */
2186                                 mono_regstate_free_int (rs, new_dest);
2187                         }
2188                 }
2189
2190                 /*
2191                  * TRACK DREG
2192                  */
2193                 if (spec [MONO_INST_DEST] == 'f') {
2194                         if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
2195                                 GList *spill_node;
2196                                 MonoInst *store;
2197                                 spill_node = g_list_first (fspill_list);
2198                                 g_assert (spill_node);
2199
2200                                 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
2201                                 insert_before_ins (ins, tmp, store);
2202                                 fspill_list = g_list_remove (fspill_list, spill_node->data);
2203                                 fspill--;
2204                         }
2205                 } else if (spec [MONO_INST_DEST] == 'L') {
2206                         int hreg;
2207                         val = rs->iassign [ins->dreg];
2208                         /* check special case when dreg have been moved from ecx (clob shift) */
2209                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2210                                 hreg = clob_dreg + 1;
2211                         else
2212                                 hreg = ins->dreg + 1;
2213
2214                         /* base prev_dreg on fixed hreg, handle clob case */
2215                         prev_dreg = hreg - 1;
2216
2217                         if (val < 0) {
2218                                 int spill = 0;
2219                                 if (val < -1) {
2220                                         /* the register gets spilled after this inst */
2221                                         spill = -val -1;
2222                                 }
2223                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2224                                 rs->iassign [ins->dreg] = val;
2225                                 if (spill)
2226                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2227                         }
2228
2229                         DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
2230  
2231                         rs->isymbolic [val] = hreg - 1;
2232                         ins->dreg = val;
2233                         
2234                         val = rs->iassign [hreg];
2235                         if (val < 0) {
2236                                 int spill = 0;
2237                                 if (val < -1) {
2238                                         /* the register gets spilled after this inst */
2239                                         spill = -val -1;
2240                                 }
2241                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2242                                 rs->iassign [hreg] = val;
2243                                 if (spill)
2244                                         create_spilled_store (cfg, spill, val, hreg, ins);
2245                         }
2246
2247                         DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
2248                         rs->isymbolic [val] = hreg;
2249                         /* save reg allocating into unused */
2250                         ins->unused = val;
2251
2252                         /* check if we can free our long reg */
2253                         if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2254                                 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
2255                                 mono_regstate_free_int (rs, val);
2256                         }
2257                 }
2258                 else if (ins->dreg >= MONO_MAX_IREGS) {
2259                         int hreg;
2260                         val = rs->iassign [ins->dreg];
2261                         if (spec [MONO_INST_DEST] == 'l') {
2262                                 /* check special case when dreg have been moved from ecx (clob shift) */
2263                                 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2264                                         hreg = clob_dreg + 1;
2265                                 else
2266                                         hreg = ins->dreg + 1;
2267
2268                                 /* base prev_dreg on fixed hreg, handle clob case */
2269                                 prev_dreg = hreg - 1;
2270                         } else
2271                                 prev_dreg = ins->dreg;
2272
2273                         if (val < 0) {
2274                                 int spill = 0;
2275                                 if (val < -1) {
2276                                         /* the register gets spilled after this inst */
2277                                         spill = -val -1;
2278                                 }
2279                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2280                                 rs->iassign [ins->dreg] = val;
2281                                 if (spill)
2282                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2283                         }
2284                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2285                         rs->isymbolic [val] = prev_dreg;
2286                         ins->dreg = val;
2287                         /* handle cases where lreg needs to be eax:edx */
2288                         if (spec [MONO_INST_DEST] == 'l') {
2289                                 /* check special case when dreg have been moved from ecx (clob shift) */
2290                                 int hreg = prev_dreg + 1;
2291                                 val = rs->iassign [hreg];
2292                                 if (val < 0) {
2293                                         int spill = 0;
2294                                         if (val < -1) {
2295                                                 /* the register gets spilled after this inst */
2296                                                 spill = -val -1;
2297                                         }
2298                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2299                                         rs->iassign [hreg] = val;
2300                                         if (spill)
2301                                                 create_spilled_store (cfg, spill, val, hreg, ins);
2302                                 }
2303                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
2304                                 rs->isymbolic [val] = hreg;
2305                                 if (ins->dreg == X86_EAX) {
2306                                         if (val != X86_EDX)
2307                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2308                                 } else if (ins->dreg == X86_EDX) {
2309                                         if (val == X86_EAX) {
2310                                                 /* swap */
2311                                                 g_assert_not_reached ();
2312                                         } else {
2313                                                 /* two forced copies */
2314                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2315                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2316                                         }
2317                                 } else {
2318                                         if (val == X86_EDX) {
2319                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2320                                         } else {
2321                                                 /* two forced copies */
2322                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2323                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2324                                         }
2325                                 }
2326                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2327                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
2328                                         mono_regstate_free_int (rs, val);
2329                                 }
2330                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != X86_EAX && spec [MONO_INST_CLOB] != 'd') {
2331                                 /* this instruction only outputs to EAX, need to copy */
2332                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2333                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != X86_EDX && spec [MONO_INST_CLOB] != 'd') {
2334                                 create_copy_ins (cfg, ins->dreg, X86_EDX, ins);
2335                         }
2336                 }
2337                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
2338                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
2339                         mono_regstate_free_int (rs, ins->dreg);
2340                 }
2341                 /* put src1 in EAX if it needs to be */
2342                 if (spec [MONO_INST_SRC1] == 'a') {
2343                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
2344                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
2345                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2346                                 mono_regstate_free_int (rs, X86_EAX);
2347                         }
2348                         if (ins->sreg1 < MONO_MAX_IREGS) {
2349                                 /* The argument is already in a hard reg, need to copy */
2350                                 MonoInst *copy = create_copy_ins (cfg, X86_EAX, ins->sreg1, NULL);
2351                                 insert_before_ins (ins, tmp, copy);
2352                         }
2353                         else
2354                                 /* force-set sreg1 */
2355                                 assign_ireg (rs, ins->sreg1, X86_EAX);
2356                         ins->sreg1 = X86_EAX;
2357                 }
2358
2359                 /*
2360                  * TRACK SREG1
2361                  */
2362                 if (spec [MONO_INST_SRC1] == 'f') {
2363                         if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
2364                                 MonoInst *load;
2365                                 MonoInst *store = NULL;
2366
2367                                 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2368                                         GList *spill_node;
2369                                         spill_node = g_list_first (fspill_list);
2370                                         g_assert (spill_node);
2371
2372                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);          
2373                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2374                                 }
2375
2376                                 fspill++;
2377                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2378                                 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
2379                                 insert_before_ins (ins, tmp, load);
2380                                 if (store) 
2381                                         insert_before_ins (load, tmp, store);
2382                         }
2383                 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
2384                         /* force source to be same as dest */
2385                         assign_ireg (rs, ins->sreg1, ins->dreg);
2386                         assign_ireg (rs, ins->sreg1 + 1, ins->unused);
2387
2388                         DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
2389                         DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
2390
2391                         ins->sreg1 = ins->dreg;
2392                         /* 
2393                          * No need for saving the reg, we know that src1=dest in this cases
2394                          * ins->inst_c0 = ins->unused;
2395                          */
2396                 }
2397                 else if (ins->sreg1 >= MONO_MAX_IREGS) {
2398                         val = rs->iassign [ins->sreg1];
2399                         prev_sreg1 = ins->sreg1;
2400                         if (val < 0) {
2401                                 int spill = 0;
2402                                 if (val < -1) {
2403                                         /* the register gets spilled after this inst */
2404                                         spill = -val -1;
2405                                 }
2406                                 if (0 && ins->opcode == OP_MOVE) {
2407                                         /* 
2408                                          * small optimization: the dest register is already allocated
2409                                          * but the src one is not: we can simply assign the same register
2410                                          * here and peephole will get rid of the instruction later.
2411                                          * This optimization may interfere with the clobbering handling:
2412                                          * it removes a mov operation that will be added again to handle clobbering.
2413                                          * There are also some other issues that should with make testjit.
2414                                          */
2415                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
2416                                         val = rs->iassign [ins->sreg1] = ins->dreg;
2417                                         //g_assert (val >= 0);
2418                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2419                                 } else {
2420                                         //g_assert (val == -1); /* source cannot be spilled */
2421                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
2422                                         rs->iassign [ins->sreg1] = val;
2423                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2424                                 }
2425                                 if (spill) {
2426                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
2427                                         insert_before_ins (ins, tmp, store);
2428                                 }
2429                         }
2430                         rs->isymbolic [val] = prev_sreg1;
2431                         ins->sreg1 = val;
2432                 } else {
2433                         prev_sreg1 = -1;
2434                 }
2435                 /* handle clobbering of sreg1 */
2436                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
2437                         MonoInst *sreg2_copy = NULL;
2438                         MonoInst *copy = NULL;
2439
2440                         if (ins->dreg == ins->sreg2) {
2441                                 /* 
2442                                  * copying sreg1 to dreg could clobber sreg2, so allocate a new
2443                                  * register for it.
2444                                  */
2445                                 int reg2 = 0;
2446
2447                                 reg2 = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->sreg2, 0);
2448
2449                                 DEBUG (g_print ("\tneed to copy sreg2 %s to reg %s\n", mono_arch_regname (ins->sreg2), mono_arch_regname (reg2)));
2450                                 sreg2_copy = create_copy_ins (cfg, reg2, ins->sreg2, NULL);
2451                                 prev_sreg2 = ins->sreg2 = reg2;
2452
2453                                 mono_regstate_free_int (rs, reg2);
2454                         }
2455
2456                         copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
2457                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
2458                         insert_before_ins (ins, tmp, copy);
2459
2460                         if (sreg2_copy)
2461                                 insert_before_ins (copy, tmp, sreg2_copy);
2462
2463                         /*
2464                          * Need to prevent sreg2 to be allocated to sreg1, since that
2465                          * would screw up the previous copy.
2466                          */
2467                         src2_mask &= ~ (1 << ins->sreg1);
2468                         /* we set sreg1 to dest as well */
2469                         prev_sreg1 = ins->sreg1 = ins->dreg;
2470                         src2_mask &= ~ (1 << ins->dreg);
2471                 }
2472
2473                 /*
2474                  * TRACK SREG2
2475                  */
2476                 if (spec [MONO_INST_SRC2] == 'f') {
2477                         if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
2478                                 MonoInst *load;
2479                                 MonoInst *store = NULL;
2480
2481                                 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2482                                         GList *spill_node;
2483
2484                                         spill_node = g_list_first (fspill_list);
2485                                         g_assert (spill_node);
2486                                         if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
2487                                                 spill_node = g_list_next (spill_node);
2488         
2489                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
2490                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2491                                 } 
2492                                 
2493                                 fspill++;
2494                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2495                                 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
2496                                 insert_before_ins (ins, tmp, load);
2497                                 if (store) 
2498                                         insert_before_ins (load, tmp, store);
2499                         }
2500                 } 
2501                 else if (ins->sreg2 >= MONO_MAX_IREGS) {
2502                         val = rs->iassign [ins->sreg2];
2503                         prev_sreg2 = ins->sreg2;
2504                         if (val < 0) {
2505                                 int spill = 0;
2506                                 if (val < -1) {
2507                                         /* the register gets spilled after this inst */
2508                                         spill = -val -1;
2509                                 }
2510                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
2511                                 rs->iassign [ins->sreg2] = val;
2512                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
2513                                 if (spill)
2514                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
2515                         }
2516                         rs->isymbolic [val] = prev_sreg2;
2517                         ins->sreg2 = val;
2518                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != X86_ECX) {
2519                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [X86_ECX]));
2520                         }
2521                 } else {
2522                         prev_sreg2 = -1;
2523                 }
2524
2525                 if (spec [MONO_INST_CLOB] == 'c') {
2526                         int j, s;
2527                         guint32 clob_mask = X86_CALLEE_REGS;
2528                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
2529                                 s = 1 << j;
2530                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2531                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2532                                 }
2533                         }
2534                 }
2535                 if (spec [MONO_INST_CLOB] == 'a') {
2536                         guint32 clob_reg = X86_EAX;
2537                         if (!(rs->ifree_mask & (1 << clob_reg)) && (rs->isymbolic [clob_reg] >= 8)) {
2538                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2539                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2540                                 mono_regstate_free_int (rs, clob_reg);
2541                         }
2542                 }
2543                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2544                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2545                         mono_regstate_free_int (rs, ins->sreg1);
2546                 }
2547                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2548                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2549                         mono_regstate_free_int (rs, ins->sreg2);
2550                 }*/
2551         
2552                 //DEBUG (print_ins (i, ins));
2553                 /* this may result from a insert_before call */
2554                 if (!tmp->next)
2555                         bb->code = tmp->data;
2556                 tmp = tmp->next;
2557         }
2558
2559         g_free (reginfo);
2560         g_free (reginfof);
2561         g_list_free (fspill_list);
2562 }
2563
2564 static unsigned char*
2565 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2566 {
2567         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2568         x86_fnstcw_membase(code, X86_ESP, 0);
2569         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
2570         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2571         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
2572         x86_fldcw_membase (code, X86_ESP, 2);
2573         if (size == 8) {
2574                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2575                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2576                 x86_pop_reg (code, dreg);
2577                 /* FIXME: need the high register 
2578                  * x86_pop_reg (code, dreg_high);
2579                  */
2580         } else {
2581                 x86_push_reg (code, X86_EAX); // SP = SP - 4
2582                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
2583                 x86_pop_reg (code, dreg);
2584         }
2585         x86_fldcw_membase (code, X86_ESP, 0);
2586         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2587
2588         if (size == 1)
2589                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
2590         else if (size == 2)
2591                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
2592         return code;
2593 }
2594
2595 static unsigned char*
2596 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2597 {
2598         int sreg = tree->sreg1;
2599         int need_touch = FALSE;
2600
2601 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
2602         need_touch = TRUE;
2603 #endif
2604
2605         if (need_touch) {
2606                 guint8* br[5];
2607
2608                 /*
2609                  * Under Windows:
2610                  * If requested stack size is larger than one page,
2611                  * perform stack-touch operation
2612                  */
2613                 /*
2614                  * Generate stack probe code.
2615                  * Under Windows, it is necessary to allocate one page at a time,
2616                  * "touching" stack after each successful sub-allocation. This is
2617                  * because of the way stack growth is implemented - there is a
2618                  * guard page before the lowest stack page that is currently commited.
2619                  * Stack normally grows sequentially so OS traps access to the
2620                  * guard page and commits more pages when needed.
2621                  */
2622                 x86_test_reg_imm (code, sreg, ~0xFFF);
2623                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2624
2625                 br[2] = code; /* loop */
2626                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
2627                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
2628
2629                 /* 
2630                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
2631                  * that follows only initializes the last part of the area.
2632                  */
2633                 /* Same as the init code below with size==0x1000 */
2634                 if (tree->flags & MONO_INST_INIT) {
2635                         x86_push_reg (code, X86_EAX);
2636                         x86_push_reg (code, X86_ECX);
2637                         x86_push_reg (code, X86_EDI);
2638                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
2639                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
2640                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2641                         x86_cld (code);
2642                         x86_prefix (code, X86_REP_PREFIX);
2643                         x86_stosl (code);
2644                         x86_pop_reg (code, X86_EDI);
2645                         x86_pop_reg (code, X86_ECX);
2646                         x86_pop_reg (code, X86_EAX);
2647                 }
2648
2649                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2650                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2651                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2652                 x86_patch (br[3], br[2]);
2653                 x86_test_reg_reg (code, sreg, sreg);
2654                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2655                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2656
2657                 br[1] = code; x86_jump8 (code, 0);
2658
2659                 x86_patch (br[0], code);
2660                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2661                 x86_patch (br[1], code);
2662                 x86_patch (br[4], code);
2663         }
2664         else
2665                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
2666
2667         if (tree->flags & MONO_INST_INIT) {
2668                 int offset = 0;
2669                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
2670                         x86_push_reg (code, X86_EAX);
2671                         offset += 4;
2672                 }
2673                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
2674                         x86_push_reg (code, X86_ECX);
2675                         offset += 4;
2676                 }
2677                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
2678                         x86_push_reg (code, X86_EDI);
2679                         offset += 4;
2680                 }
2681                 
2682                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
2683                 if (sreg != X86_ECX)
2684                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
2685                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2686                                 
2687                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2688                 x86_cld (code);
2689                 x86_prefix (code, X86_REP_PREFIX);
2690                 x86_stosl (code);
2691                 
2692                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2693                         x86_pop_reg (code, X86_EDI);
2694                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2695                         x86_pop_reg (code, X86_ECX);
2696                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2697                         x86_pop_reg (code, X86_EAX);
2698         }
2699         return code;
2700 }
2701
2702
2703 static guint8*
2704 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2705 {
2706         CallInfo *cinfo;
2707         int quad;
2708
2709         /* Move return value to the target register */
2710         switch (ins->opcode) {
2711         case CEE_CALL:
2712         case OP_CALL_REG:
2713         case OP_CALL_MEMBASE:
2714                 if (ins->dreg != X86_EAX)
2715                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2716                 break;
2717         case OP_VCALL:
2718         case OP_VCALL_REG:
2719         case OP_VCALL_MEMBASE:
2720                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
2721                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2722                         /* Pop the destination address from the stack */
2723                         x86_pop_reg (code, X86_ECX);
2724                         
2725                         for (quad = 0; quad < 2; quad ++) {
2726                                 switch (cinfo->ret.pair_storage [quad]) {
2727                                 case ArgInIReg:
2728                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
2729                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
2730                                         break;
2731                                 case ArgNone:
2732                                         break;
2733                                 default:
2734                                         g_assert_not_reached ();
2735                                 }
2736                         }
2737                 }
2738                 g_free (cinfo);
2739         default:
2740                 break;
2741         }
2742
2743         return code;
2744 }
2745
2746 static guint8*
2747 emit_tls_get (guint8* code, int dreg, int tls_offset)
2748 {
2749 #ifdef PLATFORM_WIN32
2750         /* 
2751          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
2752          * Journal and/or a disassembly of the TlsGet () function.
2753          */
2754         g_assert (tls_offset < 64);
2755         x86_prefix (code, X86_FS_PREFIX);
2756         x86_mov_reg_mem (code, dreg, 0x18, 4);
2757         /* Dunno what this does but TlsGetValue () contains it */
2758         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2759         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2760 #else
2761         x86_prefix (code, X86_GS_PREFIX);
2762         x86_mov_reg_mem (code, dreg, tls_offset, 4);                    
2763 #endif
2764         return code;
2765 }
2766
2767 #define REAL_PRINT_REG(text,reg) \
2768 mono_assert (reg >= 0); \
2769 x86_push_reg (code, X86_EAX); \
2770 x86_push_reg (code, X86_EDX); \
2771 x86_push_reg (code, X86_ECX); \
2772 x86_push_reg (code, reg); \
2773 x86_push_imm (code, reg); \
2774 x86_push_imm (code, text " %d %p\n"); \
2775 x86_mov_reg_imm (code, X86_EAX, printf); \
2776 x86_call_reg (code, X86_EAX); \
2777 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2778 x86_pop_reg (code, X86_ECX); \
2779 x86_pop_reg (code, X86_EDX); \
2780 x86_pop_reg (code, X86_EAX);
2781
2782 /* benchmark and set based on cpu */
2783 #define LOOP_ALIGNMENT 8
2784 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2785
2786 void
2787 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2788 {
2789         MonoInst *ins;
2790         MonoCallInst *call;
2791         guint offset;
2792         guint8 *code = cfg->native_code + cfg->code_len;
2793         MonoInst *last_ins = NULL;
2794         guint last_offset = 0;
2795         int max_len, cpos;
2796
2797         if (cfg->opt & MONO_OPT_PEEPHOLE)
2798                 peephole_pass (cfg, bb);
2799
2800         if (cfg->opt & MONO_OPT_LOOP) {
2801                 int pad, align = LOOP_ALIGNMENT;
2802                 /* set alignment depending on cpu */
2803                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2804                         pad = align - pad;
2805                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2806                         x86_padding (code, pad);
2807                         cfg->code_len += pad;
2808                         bb->native_offset = cfg->code_len;
2809                 }
2810         }
2811
2812         if (cfg->verbose_level > 2)
2813                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2814
2815         cpos = bb->max_offset;
2816
2817         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2818                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2819                 g_assert (!cfg->compile_aot);
2820                 cpos += 6;
2821
2822                 cov->data [bb->dfn].cil_code = bb->cil_code;
2823                 /* this is not thread save, but good enough */
2824                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2825         }
2826
2827         offset = code - cfg->native_code;
2828
2829         ins = bb->code;
2830         while (ins) {
2831                 offset = code - cfg->native_code;
2832
2833                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2834
2835                 if (offset > (cfg->code_size - max_len - 16)) {
2836                         cfg->code_size *= 2;
2837                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2838                         code = cfg->native_code + offset;
2839                         mono_jit_stats.code_reallocs++;
2840                 }
2841
2842                 mono_debug_record_line_number (cfg, ins, offset);
2843
2844                 switch (ins->opcode) {
2845                 case OP_BIGMUL:
2846                         x86_mul_reg (code, ins->sreg2, TRUE);
2847                         break;
2848                 case OP_BIGMUL_UN:
2849                         x86_mul_reg (code, ins->sreg2, FALSE);
2850                         break;
2851                 case OP_X86_SETEQ_MEMBASE:
2852                 case OP_X86_SETNE_MEMBASE:
2853                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2854                                          ins->inst_basereg, ins->inst_offset, TRUE);
2855                         break;
2856                 case OP_STOREI1_MEMBASE_IMM:
2857                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2858                         break;
2859                 case OP_STOREI2_MEMBASE_IMM:
2860                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2861                         break;
2862                 case OP_STORE_MEMBASE_IMM:
2863                 case OP_STOREI4_MEMBASE_IMM:
2864                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2865                         break;
2866                 case OP_STOREI1_MEMBASE_REG:
2867                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2868                         break;
2869                 case OP_STOREI2_MEMBASE_REG:
2870                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2871                         break;
2872                 case OP_STORE_MEMBASE_REG:
2873                 case OP_STOREI4_MEMBASE_REG:
2874                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2875                         break;
2876                 case CEE_LDIND_I:
2877                 case CEE_LDIND_I4:
2878                 case CEE_LDIND_U4:
2879                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2880                         break;
2881                 case OP_LOADU4_MEM:
2882                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2883                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2884                         break;
2885                 case OP_LOAD_MEMBASE:
2886                 case OP_LOADI4_MEMBASE:
2887                 case OP_LOADU4_MEMBASE:
2888                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2889                         break;
2890                 case OP_LOADU1_MEMBASE:
2891                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2892                         break;
2893                 case OP_LOADI1_MEMBASE:
2894                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2895                         break;
2896                 case OP_LOADU2_MEMBASE:
2897                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2898                         break;
2899                 case OP_LOADI2_MEMBASE:
2900                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2901                         break;
2902                 case CEE_CONV_I1:
2903                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2904                         break;
2905                 case CEE_CONV_I2:
2906                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2907                         break;
2908                 case CEE_CONV_U1:
2909                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2910                         break;
2911                 case CEE_CONV_U2:
2912                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2913                         break;
2914                 case OP_COMPARE:
2915                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2916                         break;
2917                 case OP_COMPARE_IMM:
2918                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2919                         break;
2920                 case OP_X86_COMPARE_MEMBASE_REG:
2921                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2922                         break;
2923                 case OP_X86_COMPARE_MEMBASE_IMM:
2924                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2925                         break;
2926                 case OP_X86_COMPARE_MEMBASE8_IMM:
2927                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2928                         break;
2929                 case OP_X86_COMPARE_REG_MEMBASE:
2930                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2931                         break;
2932                 case OP_X86_COMPARE_MEM_IMM:
2933                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2934                         break;
2935                 case OP_X86_TEST_NULL:
2936                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2937                         break;
2938                 case OP_X86_ADD_MEMBASE_IMM:
2939                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2940                         break;
2941                 case OP_X86_ADD_MEMBASE:
2942                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2943                         break;
2944                 case OP_X86_SUB_MEMBASE_IMM:
2945                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2946                         break;
2947                 case OP_X86_SUB_MEMBASE:
2948                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2949                         break;
2950                 case OP_X86_INC_MEMBASE:
2951                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2952                         break;
2953                 case OP_X86_INC_REG:
2954                         x86_inc_reg (code, ins->dreg);
2955                         break;
2956                 case OP_X86_DEC_MEMBASE:
2957                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2958                         break;
2959                 case OP_X86_DEC_REG:
2960                         x86_dec_reg (code, ins->dreg);
2961                         break;
2962                 case OP_X86_MUL_MEMBASE:
2963                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2964                         break;
2965                 case CEE_BREAK:
2966                         x86_breakpoint (code);
2967                         break;
2968                 case OP_ADDCC:
2969                 case CEE_ADD:
2970                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2971                         break;
2972                 case OP_ADC:
2973                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2974                         break;
2975                 case OP_ADDCC_IMM:
2976                 case OP_ADD_IMM:
2977                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2978                         break;
2979                 case OP_ADC_IMM:
2980                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2981                         break;
2982                 case OP_SUBCC:
2983                 case CEE_SUB:
2984                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2985                         break;
2986                 case OP_SBB:
2987                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2988                         break;
2989                 case OP_SUBCC_IMM:
2990                 case OP_SUB_IMM:
2991                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2992                         break;
2993                 case OP_SBB_IMM:
2994                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2995                         break;
2996                 case CEE_AND:
2997                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2998                         break;
2999                 case OP_AND_IMM:
3000                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
3001                         break;
3002                 case CEE_DIV:
3003                         x86_cdq (code);
3004                         x86_div_reg (code, ins->sreg2, TRUE);
3005                         break;
3006                 case CEE_DIV_UN:
3007                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
3008                         x86_div_reg (code, ins->sreg2, FALSE);
3009                         break;
3010                 case OP_DIV_IMM:
3011                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3012                         x86_cdq (code);
3013                         x86_div_reg (code, ins->sreg2, TRUE);
3014                         break;
3015                 case CEE_REM:
3016                         x86_cdq (code);
3017                         x86_div_reg (code, ins->sreg2, TRUE);
3018                         break;
3019                 case CEE_REM_UN:
3020                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
3021                         x86_div_reg (code, ins->sreg2, FALSE);
3022                         break;
3023                 case OP_REM_IMM:
3024                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3025                         x86_cdq (code);
3026                         x86_div_reg (code, ins->sreg2, TRUE);
3027                         break;
3028                 case CEE_OR:
3029                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
3030                         break;
3031                 case OP_OR_IMM:
3032                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
3033                         break;
3034                 case CEE_XOR:
3035                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
3036                         break;
3037                 case OP_XOR_IMM:
3038                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
3039                         break;
3040                 case CEE_SHL:
3041                         g_assert (ins->sreg2 == X86_ECX);
3042                         x86_shift_reg (code, X86_SHL, ins->dreg);
3043                         break;
3044                 case CEE_SHR:
3045                         g_assert (ins->sreg2 == X86_ECX);
3046                         x86_shift_reg (code, X86_SAR, ins->dreg);
3047                         break;
3048                 case OP_SHR_IMM:
3049                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
3050                         break;
3051                 case OP_SHR_UN_IMM:
3052                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
3053                         break;
3054                 case CEE_SHR_UN:
3055                         g_assert (ins->sreg2 == X86_ECX);
3056                         x86_shift_reg (code, X86_SHR, ins->dreg);
3057                         break;
3058                 case OP_SHL_IMM:
3059                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
3060                         break;
3061                 case OP_LSHL: {
3062                         guint8 *jump_to_end;
3063
3064                         /* handle shifts below 32 bits */
3065                         x86_shld_reg (code, ins->unused, ins->sreg1);
3066                         x86_shift_reg (code, X86_SHL, ins->sreg1);
3067
3068                         x86_test_reg_imm (code, X86_ECX, 32);
3069                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3070
3071                         /* handle shift over 32 bit */
3072                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3073                         x86_clear_reg (code, ins->sreg1);
3074                         
3075                         x86_patch (jump_to_end, code);
3076                         }
3077                         break;
3078                 case OP_LSHR: {
3079                         guint8 *jump_to_end;
3080
3081                         /* handle shifts below 32 bits */
3082                         x86_shrd_reg (code, ins->sreg1, ins->unused);
3083                         x86_shift_reg (code, X86_SAR, ins->unused);
3084
3085                         x86_test_reg_imm (code, X86_ECX, 32);
3086                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
3087
3088                         /* handle shifts over 31 bits */
3089                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3090                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
3091                         
3092                         x86_patch (jump_to_end, code);
3093                         }
3094                         break;
3095                 case OP_LSHR_UN: {
3096                         guint8 *jump_to_end;
3097
3098                         /* handle shifts below 32 bits */
3099                         x86_shrd_reg (code, ins->sreg1, ins->unused);
3100                         x86_shift_reg (code, X86_SHR, ins->unused);
3101
3102                         x86_test_reg_imm (code, X86_ECX, 32);
3103                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
3104
3105                         /* handle shifts over 31 bits */
3106                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3107                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
3108                         
3109                         x86_patch (jump_to_end, code);
3110                         }
3111                         break;
3112                 case OP_LSHL_IMM:
3113                         if (ins->inst_imm >= 32) {
3114                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3115                                 x86_clear_reg (code, ins->sreg1);
3116                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
3117                         } else {
3118                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
3119                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
3120                         }
3121                         break;
3122                 case OP_LSHR_IMM:
3123                         if (ins->inst_imm >= 32) {
3124                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
3125                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
3126                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
3127                         } else {
3128                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3129                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
3130                         }
3131                         break;
3132                 case OP_LSHR_UN_IMM:
3133                         if (ins->inst_imm >= 32) {
3134                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3135                                 x86_clear_reg (code, ins->unused);
3136                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
3137                         } else {
3138                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3139                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
3140                         }
3141                         break;
3142                 case CEE_NOT:
3143                         x86_not_reg (code, ins->sreg1);
3144                         break;
3145                 case CEE_NEG:
3146                         x86_neg_reg (code, ins->sreg1);
3147                         break;
3148                 case OP_SEXT_I1:
3149                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3150                         break;
3151                 case OP_SEXT_I2:
3152                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3153                         break;
3154                 case CEE_MUL:
3155                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3156                         break;
3157                 case OP_MUL_IMM:
3158                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
3159                         break;
3160                 case CEE_MUL_OVF:
3161                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3162                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3163                         break;
3164                 case CEE_MUL_OVF_UN: {
3165                         /* the mul operation and the exception check should most likely be split */
3166                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
3167                         /*g_assert (ins->sreg2 == X86_EAX);
3168                         g_assert (ins->dreg == X86_EAX);*/
3169                         if (ins->sreg2 == X86_EAX) {
3170                                 non_eax_reg = ins->sreg1;
3171                         } else if (ins->sreg1 == X86_EAX) {
3172                                 non_eax_reg = ins->sreg2;
3173                         } else {
3174                                 /* no need to save since we're going to store to it anyway */
3175                                 if (ins->dreg != X86_EAX) {
3176                                         saved_eax = TRUE;
3177                                         x86_push_reg (code, X86_EAX);
3178                                 }
3179                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
3180                                 non_eax_reg = ins->sreg2;
3181                         }
3182                         if (ins->dreg == X86_EDX) {
3183                                 if (!saved_eax) {
3184                                         saved_eax = TRUE;
3185                                         x86_push_reg (code, X86_EAX);
3186                                 }
3187                         } else if (ins->dreg != X86_EAX) {
3188                                 saved_edx = TRUE;
3189                                 x86_push_reg (code, X86_EDX);
3190                         }
3191                         x86_mul_reg (code, non_eax_reg, FALSE);
3192                         /* save before the check since pop and mov don't change the flags */
3193                         if (ins->dreg != X86_EAX)
3194                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3195                         if (saved_edx)
3196                                 x86_pop_reg (code, X86_EDX);
3197                         if (saved_eax)
3198                                 x86_pop_reg (code, X86_EAX);
3199                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3200                         break;
3201                 }
3202                 case OP_ICONST:
3203                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
3204                         break;
3205                 case OP_AOTCONST:
3206                         g_assert_not_reached ();
3207                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
3208                         x86_mov_reg_imm (code, ins->dreg, 0);
3209                         break;
3210                 case OP_LOAD_GOTADDR:
3211                         x86_call_imm (code, 0);
3212                         /* 
3213                          * The patch needs to point to the pop, since the GOT offset needs 
3214                          * to be added to that address.
3215                          */
3216                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3217                         x86_pop_reg (code, ins->dreg);
3218                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
3219                         break;
3220                 case OP_GOT_ENTRY:
3221                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3222                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
3223                         break;
3224                 case OP_X86_PUSH_GOT_ENTRY:
3225                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3226                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
3227                         break;
3228                 case CEE_CONV_I4:
3229                 case OP_MOVE:
3230                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3231                         break;
3232                 case CEE_CONV_U4:
3233                         g_assert_not_reached ();
3234                 case CEE_JMP: {
3235                         /*
3236                          * Note: this 'frame destruction' logic is useful for tail calls, too.
3237                          * Keep in sync with the code in emit_epilog.
3238                          */
3239                         int pos = 0;
3240
3241                         /* FIXME: no tracing support... */
3242                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3243                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3244                         /* reset offset to make max_len work */
3245                         offset = code - cfg->native_code;
3246
3247                         g_assert (!cfg->method->save_lmf);
3248
3249                         if (cfg->used_int_regs & (1 << X86_EBX))
3250                                 pos -= 4;
3251                         if (cfg->used_int_regs & (1 << X86_EDI))
3252                                 pos -= 4;
3253                         if (cfg->used_int_regs & (1 << X86_ESI))
3254                                 pos -= 4;
3255                         if (pos)
3256                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3257         
3258                         if (cfg->used_int_regs & (1 << X86_ESI))
3259                                 x86_pop_reg (code, X86_ESI);
3260                         if (cfg->used_int_regs & (1 << X86_EDI))
3261                                 x86_pop_reg (code, X86_EDI);
3262                         if (cfg->used_int_regs & (1 << X86_EBX))
3263                                 x86_pop_reg (code, X86_EBX);
3264         
3265                         /* restore ESP/EBP */
3266                         x86_leave (code);
3267                         offset = code - cfg->native_code;
3268                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3269                         x86_jump32 (code, 0);
3270                         break;
3271                 }
3272                 case OP_CHECK_THIS:
3273                         /* ensure ins->sreg1 is not NULL
3274                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
3275                          * cmp DWORD PTR [eax], 0
3276                          */
3277                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
3278                         break;
3279                 case OP_ARGLIST: {
3280                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
3281                         x86_push_reg (code, hreg);
3282                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
3283                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
3284                         x86_pop_reg (code, hreg);
3285                         break;
3286                 }
3287                 case OP_FCALL:
3288                 case OP_LCALL:
3289                 case OP_VCALL:
3290                 case OP_VOIDCALL:
3291                 case CEE_CALL:
3292                         call = (MonoCallInst*)ins;
3293                         if (ins->flags & MONO_INST_HAS_METHOD)
3294                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
3295                         else
3296                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
3297                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3298                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
3299                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
3300                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
3301                                  * smart enough to do that optimization yet
3302                                  *
3303                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
3304                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
3305                                  * (most likely from locality benefits). People with other processors should
3306                                  * check on theirs to see what happens.
3307                                  */
3308                                 if (call->stack_usage == 4) {
3309                                         /* we want to use registers that won't get used soon, so use
3310                                          * ecx, as eax will get allocated first. edx is used by long calls,
3311                                          * so we can't use that.
3312                                          */
3313                                         
3314                                         x86_pop_reg (code, X86_ECX);
3315                                 } else {
3316                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3317                                 }
3318                         }
3319                         code = emit_move_return_value (cfg, ins, code);
3320                         break;
3321                 case OP_FCALL_REG:
3322                 case OP_LCALL_REG:
3323                 case OP_VCALL_REG:
3324                 case OP_VOIDCALL_REG:
3325                 case OP_CALL_REG:
3326                         call = (MonoCallInst*)ins;
3327                         x86_call_reg (code, ins->sreg1);
3328                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3329                                 if (call->stack_usage == 4)
3330                                         x86_pop_reg (code, X86_ECX);
3331                                 else
3332                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3333                         }
3334                         code = emit_move_return_value (cfg, ins, code);
3335                         break;
3336                 case OP_FCALL_MEMBASE:
3337                 case OP_LCALL_MEMBASE:
3338                 case OP_VCALL_MEMBASE:
3339                 case OP_VOIDCALL_MEMBASE:
3340                 case OP_CALL_MEMBASE:
3341                         call = (MonoCallInst*)ins;
3342                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
3343                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3344                                 if (call->stack_usage == 4)
3345                                         x86_pop_reg (code, X86_ECX);
3346                                 else
3347                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3348                         }
3349                         code = emit_move_return_value (cfg, ins, code);
3350                         break;
3351                 case OP_OUTARG:
3352                 case OP_X86_PUSH:
3353                         x86_push_reg (code, ins->sreg1);
3354                         break;
3355                 case OP_X86_PUSH_IMM:
3356                         x86_push_imm (code, ins->inst_imm);
3357                         break;
3358                 case OP_X86_PUSH_MEMBASE:
3359                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
3360                         break;
3361                 case OP_X86_PUSH_OBJ: 
3362                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
3363                         x86_push_reg (code, X86_EDI);
3364                         x86_push_reg (code, X86_ESI);
3365                         x86_push_reg (code, X86_ECX);
3366                         if (ins->inst_offset)
3367                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
3368                         else
3369                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
3370                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
3371                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
3372                         x86_cld (code);
3373                         x86_prefix (code, X86_REP_PREFIX);
3374                         x86_movsd (code);
3375                         x86_pop_reg (code, X86_ECX);
3376                         x86_pop_reg (code, X86_ESI);
3377                         x86_pop_reg (code, X86_EDI);
3378                         break;
3379                 case OP_X86_LEA:
3380                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
3381                         break;
3382                 case OP_X86_LEA_MEMBASE:
3383                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3384                         break;
3385                 case OP_X86_XCHG:
3386                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3387                         break;
3388                 case OP_LOCALLOC:
3389                         /* keep alignment */
3390                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
3391                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
3392                         code = mono_emit_stack_alloc (code, ins);
3393                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
3394                         break;
3395                 case CEE_RET:
3396                         x86_ret (code);
3397                         break;
3398                 case CEE_THROW: {
3399                         x86_push_reg (code, ins->sreg1);
3400                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3401                                                           (gpointer)"mono_arch_throw_exception");
3402                         break;
3403                 }
3404                 case OP_RETHROW: {
3405                         x86_push_reg (code, ins->sreg1);
3406                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3407                                                           (gpointer)"mono_arch_rethrow_exception");
3408                         break;
3409                 }
3410                 case OP_CALL_HANDLER: 
3411                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3412                         x86_call_imm (code, 0);
3413                         break;
3414                 case OP_LABEL:
3415                         ins->inst_c0 = code - cfg->native_code;
3416                         break;
3417                 case CEE_BR:
3418                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
3419                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
3420                         //break;
3421                         if (ins->flags & MONO_INST_BRLABEL) {
3422                                 if (ins->inst_i0->inst_c0) {
3423                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
3424                                 } else {
3425                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
3426                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3427                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
3428                                                 x86_jump8 (code, 0);
3429                                         else 
3430                                                 x86_jump32 (code, 0);
3431                                 }
3432                         } else {
3433                                 if (ins->inst_target_bb->native_offset) {
3434                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
3435                                 } else {
3436                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3437                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3438                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3439                                                 x86_jump8 (code, 0);
3440                                         else 
3441                                                 x86_jump32 (code, 0);
3442                                 } 
3443                         }
3444                         break;
3445                 case OP_BR_REG:
3446                         x86_jump_reg (code, ins->sreg1);
3447                         break;
3448                 case OP_CEQ:
3449                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3450                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3451                         break;
3452                 case OP_CLT:
3453                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
3454                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3455                         break;
3456                 case OP_CLT_UN:
3457                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3458                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3459                         break;
3460                 case OP_CGT:
3461                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
3462                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3463                         break;
3464                 case OP_CGT_UN:
3465                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3466                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3467                         break;
3468                 case OP_CNE:
3469                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
3470                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3471                         break;
3472                 case OP_COND_EXC_EQ:
3473                 case OP_COND_EXC_NE_UN:
3474                 case OP_COND_EXC_LT:
3475                 case OP_COND_EXC_LT_UN:
3476                 case OP_COND_EXC_GT:
3477                 case OP_COND_EXC_GT_UN:
3478                 case OP_COND_EXC_GE:
3479                 case OP_COND_EXC_GE_UN:
3480                 case OP_COND_EXC_LE:
3481                 case OP_COND_EXC_LE_UN:
3482                 case OP_COND_EXC_OV:
3483                 case OP_COND_EXC_NO:
3484                 case OP_COND_EXC_C:
3485                 case OP_COND_EXC_NC:
3486                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
3487                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3488                         break;
3489                 case CEE_BEQ:
3490                 case CEE_BNE_UN:
3491                 case CEE_BLT:
3492                 case CEE_BLT_UN:
3493                 case CEE_BGT:
3494                 case CEE_BGT_UN:
3495                 case CEE_BGE:
3496                 case CEE_BGE_UN:
3497                 case CEE_BLE:
3498                 case CEE_BLE_UN:
3499                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
3500                         break;
3501
3502                 /* floating point opcodes */
3503                 case OP_R8CONST: {
3504                         double d = *(double *)ins->inst_p0;
3505
3506                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3507                                 x86_fldz (code);
3508                         } else if (d == 1.0) {
3509                                 x86_fld1 (code);
3510                         } else {
3511                                 if (cfg->compile_aot) {
3512                                         guint32 *val = (guint32*)&d;
3513                                         x86_push_imm (code, val [1]);
3514                                         x86_push_imm (code, val [0]);
3515                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
3516                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3517                                 }
3518                                 else {
3519                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3520                                         x86_fld (code, NULL, TRUE);
3521                                 }
3522                         }
3523                         break;
3524                 }
3525                 case OP_R4CONST: {
3526                         float f = *(float *)ins->inst_p0;
3527
3528                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3529                                 x86_fldz (code);
3530                         } else if (f == 1.0) {
3531                                 x86_fld1 (code);
3532                         } else {
3533                                 if (cfg->compile_aot) {
3534                                         guint32 val = *(guint32*)&f;
3535                                         x86_push_imm (code, val);
3536                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
3537                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3538                                 }
3539                                 else {
3540                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3541                                         x86_fld (code, NULL, FALSE);
3542                                 }
3543                         }
3544                         break;
3545                 }
3546                 case OP_STORER8_MEMBASE_REG:
3547                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3548                         break;
3549                 case OP_LOADR8_SPILL_MEMBASE:
3550                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3551                         x86_fxch (code, 1);
3552                         break;
3553                 case OP_LOADR8_MEMBASE:
3554                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3555                         break;
3556                 case OP_STORER4_MEMBASE_REG:
3557                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3558                         break;
3559                 case OP_LOADR4_MEMBASE:
3560                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3561                         break;
3562                 case CEE_CONV_R4: /* FIXME: change precision */
3563                 case CEE_CONV_R8:
3564                         x86_push_reg (code, ins->sreg1);
3565                         x86_fild_membase (code, X86_ESP, 0, FALSE);
3566                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3567                         break;
3568                 case OP_X86_FP_LOAD_I8:
3569                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3570                         break;
3571                 case OP_X86_FP_LOAD_I4:
3572                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3573                         break;
3574                 case OP_FCONV_TO_I1:
3575                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3576                         break;
3577                 case OP_FCONV_TO_U1:
3578                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3579                         break;
3580                 case OP_FCONV_TO_I2:
3581                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3582                         break;
3583                 case OP_FCONV_TO_U2:
3584                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3585                         break;
3586                 case OP_FCONV_TO_I4:
3587                 case OP_FCONV_TO_I:
3588                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3589                         break;
3590                 case OP_FCONV_TO_I8:
3591                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3592                         x86_fnstcw_membase(code, X86_ESP, 0);
3593                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3594                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3595                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3596                         x86_fldcw_membase (code, X86_ESP, 2);
3597                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3598                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3599                         x86_pop_reg (code, ins->dreg);
3600                         x86_pop_reg (code, ins->unused);
3601                         x86_fldcw_membase (code, X86_ESP, 0);
3602                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3603                         break;
3604                 case OP_LCONV_TO_R_UN: { 
3605                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3606                         guint8 *br;
3607
3608                         /* load 64bit integer to FP stack */
3609                         x86_push_imm (code, 0);
3610                         x86_push_reg (code, ins->sreg2);
3611                         x86_push_reg (code, ins->sreg1);
3612                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3613                         /* store as 80bit FP value */
3614                         x86_fst80_membase (code, X86_ESP, 0);
3615                         
3616                         /* test if lreg is negative */
3617                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3618                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3619         
3620                         /* add correction constant mn */
3621                         x86_fld80_mem (code, mn);
3622                         x86_fld80_membase (code, X86_ESP, 0);
3623                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3624                         x86_fst80_membase (code, X86_ESP, 0);
3625
3626                         x86_patch (br, code);
3627
3628                         x86_fld80_membase (code, X86_ESP, 0);
3629                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
3630
3631                         break;
3632                 }
3633                 case OP_LCONV_TO_OVF_I: {
3634                         guint8 *br [3], *label [1];
3635
3636                         /* 
3637                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3638                          */
3639                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3640
3641                         /* If the low word top bit is set, see if we are negative */
3642                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3643                         /* We are not negative (no top bit set, check for our top word to be zero */
3644                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3645                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3646                         label [0] = code;
3647
3648                         /* throw exception */
3649                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3650                         x86_jump32 (code, 0);
3651         
3652                         x86_patch (br [0], code);
3653                         /* our top bit is set, check that top word is 0xfffffff */
3654                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3655                 
3656                         x86_patch (br [1], code);
3657                         /* nope, emit exception */
3658                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3659                         x86_patch (br [2], label [0]);
3660
3661                         if (ins->dreg != ins->sreg1)
3662                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3663                         break;
3664                 }
3665                 case OP_FADD:
3666                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3667                         break;
3668                 case OP_FSUB:
3669                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3670                         break;          
3671                 case OP_FMUL:
3672                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3673                         break;          
3674                 case OP_FDIV:
3675                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3676                         break;          
3677                 case OP_FNEG:
3678                         x86_fchs (code);
3679                         break;          
3680                 case OP_SIN:
3681                         x86_fsin (code);
3682                         x86_fldz (code);
3683                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3684                         break;          
3685                 case OP_COS:
3686                         x86_fcos (code);
3687                         x86_fldz (code);
3688                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3689                         break;          
3690                 case OP_ABS:
3691                         x86_fabs (code);
3692                         break;          
3693                 case OP_TAN: {
3694                         /* 
3695                          * it really doesn't make sense to inline all this code,
3696                          * it's here just to show that things may not be as simple 
3697                          * as they appear.
3698                          */
3699                         guchar *check_pos, *end_tan, *pop_jump;
3700                         x86_push_reg (code, X86_EAX);
3701                         x86_fptan (code);
3702                         x86_fnstsw (code);
3703                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3704                         check_pos = code;
3705                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3706                         x86_fstp (code, 0); /* pop the 1.0 */
3707                         end_tan = code;
3708                         x86_jump8 (code, 0);
3709                         x86_fldpi (code);
3710                         x86_fp_op (code, X86_FADD, 0);
3711                         x86_fxch (code, 1);
3712                         x86_fprem1 (code);
3713                         x86_fstsw (code);
3714                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3715                         pop_jump = code;
3716                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3717                         x86_fstp (code, 1);
3718                         x86_fptan (code);
3719                         x86_patch (pop_jump, code);
3720                         x86_fstp (code, 0); /* pop the 1.0 */
3721                         x86_patch (check_pos, code);
3722                         x86_patch (end_tan, code);
3723                         x86_fldz (code);
3724                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3725                         x86_pop_reg (code, X86_EAX);
3726                         break;
3727                 }
3728                 case OP_ATAN:
3729                         x86_fld1 (code);
3730                         x86_fpatan (code);
3731                         x86_fldz (code);
3732                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3733                         break;          
3734                 case OP_SQRT:
3735                         x86_fsqrt (code);
3736                         break;          
3737                 case OP_X86_FPOP:
3738                         x86_fstp (code, 0);
3739                         break;          
3740                 case OP_FREM: {
3741                         guint8 *l1, *l2;
3742
3743                         x86_push_reg (code, X86_EAX);
3744                         /* we need to exchange ST(0) with ST(1) */
3745                         x86_fxch (code, 1);
3746
3747                         /* this requires a loop, because fprem somtimes 
3748                          * returns a partial remainder */
3749                         l1 = code;
3750                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3751                         /* x86_fprem1 (code); */
3752                         x86_fprem (code);
3753                         x86_fnstsw (code);
3754                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3755                         l2 = code + 2;
3756                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3757
3758                         /* pop result */
3759                         x86_fstp (code, 1);
3760
3761                         x86_pop_reg (code, X86_EAX);
3762                         break;
3763                 }
3764                 case OP_FCOMPARE:
3765                         if (cfg->opt & MONO_OPT_FCMOV) {
3766                                 x86_fcomip (code, 1);
3767                                 x86_fstp (code, 0);
3768                                 break;
3769                         }
3770                         /* this overwrites EAX */
3771                         EMIT_FPCOMPARE(code);
3772                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3773                         break;
3774                 case OP_FCEQ:
3775                         if (cfg->opt & MONO_OPT_FCMOV) {
3776                                 /* zeroing the register at the start results in 
3777                                  * shorter and faster code (we can also remove the widening op)
3778                                  */
3779                                 guchar *unordered_check;
3780                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3781                                 x86_fcomip (code, 1);
3782                                 x86_fstp (code, 0);
3783                                 unordered_check = code;
3784                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3785                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3786                                 x86_patch (unordered_check, code);
3787                                 break;
3788                         }
3789                         if (ins->dreg != X86_EAX) 
3790                                 x86_push_reg (code, X86_EAX);
3791
3792                         EMIT_FPCOMPARE(code);
3793                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3794                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3795                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3796                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3797
3798                         if (ins->dreg != X86_EAX) 
3799                                 x86_pop_reg (code, X86_EAX);
3800                         break;
3801                 case OP_FCLT:
3802                 case OP_FCLT_UN:
3803                         if (cfg->opt & MONO_OPT_FCMOV) {
3804                                 /* zeroing the register at the start results in 
3805                                  * shorter and faster code (we can also remove the widening op)
3806                                  */
3807                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3808                                 x86_fcomip (code, 1);
3809                                 x86_fstp (code, 0);
3810                                 if (ins->opcode == OP_FCLT_UN) {
3811                                         guchar *unordered_check = code;
3812                                         guchar *jump_to_end;
3813                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3814                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3815                                         jump_to_end = code;
3816                                         x86_jump8 (code, 0);
3817                                         x86_patch (unordered_check, code);
3818                                         x86_inc_reg (code, ins->dreg);
3819                                         x86_patch (jump_to_end, code);
3820                                 } else {
3821                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3822                                 }
3823                                 break;
3824                         }
3825                         if (ins->dreg != X86_EAX) 
3826                                 x86_push_reg (code, X86_EAX);
3827
3828                         EMIT_FPCOMPARE(code);
3829                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3830                         if (ins->opcode == OP_FCLT_UN) {
3831                                 guchar *is_not_zero_check, *end_jump;
3832                                 is_not_zero_check = code;
3833                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3834                                 end_jump = code;
3835                                 x86_jump8 (code, 0);
3836                                 x86_patch (is_not_zero_check, code);
3837                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3838
3839                                 x86_patch (end_jump, code);
3840                         }
3841                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3842                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3843
3844                         if (ins->dreg != X86_EAX) 
3845                                 x86_pop_reg (code, X86_EAX);
3846                         break;
3847                 case OP_FCGT:
3848                 case OP_FCGT_UN:
3849                         if (cfg->opt & MONO_OPT_FCMOV) {
3850                                 /* zeroing the register at the start results in 
3851                                  * shorter and faster code (we can also remove the widening op)
3852                                  */
3853                                 guchar *unordered_check;
3854                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3855                                 x86_fcomip (code, 1);
3856                                 x86_fstp (code, 0);
3857                                 if (ins->opcode == OP_FCGT) {
3858                                         unordered_check = code;
3859                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3860                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3861                                         x86_patch (unordered_check, code);
3862                                 } else {
3863                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3864                                 }
3865                                 break;
3866                         }
3867                         if (ins->dreg != X86_EAX) 
3868                                 x86_push_reg (code, X86_EAX);
3869
3870                         EMIT_FPCOMPARE(code);
3871                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3872                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3873                         if (ins->opcode == OP_FCGT_UN) {
3874                                 guchar *is_not_zero_check, *end_jump;
3875                                 is_not_zero_check = code;
3876                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3877                                 end_jump = code;
3878                                 x86_jump8 (code, 0);
3879                                 x86_patch (is_not_zero_check, code);
3880                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3881         
3882                                 x86_patch (end_jump, code);
3883                         }
3884                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3885                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3886
3887                         if (ins->dreg != X86_EAX) 
3888                                 x86_pop_reg (code, X86_EAX);
3889                         break;
3890                 case OP_FBEQ:
3891                         if (cfg->opt & MONO_OPT_FCMOV) {
3892                                 guchar *jump = code;
3893                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3894                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3895                                 x86_patch (jump, code);
3896                                 break;
3897                         }
3898                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3899                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3900                         break;
3901                 case OP_FBNE_UN:
3902                         /* Branch if C013 != 100 */
3903                         if (cfg->opt & MONO_OPT_FCMOV) {
3904                                 /* branch if !ZF or (PF|CF) */
3905                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3906                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3907                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3908                                 break;
3909                         }
3910                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3911                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3912                         break;
3913                 case OP_FBLT:
3914                         if (cfg->opt & MONO_OPT_FCMOV) {
3915                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3916                                 break;
3917                         }
3918                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3919                         break;
3920                 case OP_FBLT_UN:
3921                         if (cfg->opt & MONO_OPT_FCMOV) {
3922                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3923                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3924                                 break;
3925                         }
3926                         if (ins->opcode == OP_FBLT_UN) {
3927                                 guchar *is_not_zero_check, *end_jump;
3928                                 is_not_zero_check = code;
3929                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3930                                 end_jump = code;
3931                                 x86_jump8 (code, 0);
3932                                 x86_patch (is_not_zero_check, code);
3933                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3934
3935                                 x86_patch (end_jump, code);
3936                         }
3937                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3938                         break;
3939                 case OP_FBGT:
3940                 case OP_FBGT_UN:
3941                         if (cfg->opt & MONO_OPT_FCMOV) {
3942                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3943                                 break;
3944                         }
3945                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3946                         if (ins->opcode == OP_FBGT_UN) {
3947                                 guchar *is_not_zero_check, *end_jump;
3948                                 is_not_zero_check = code;
3949                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3950                                 end_jump = code;
3951                                 x86_jump8 (code, 0);
3952                                 x86_patch (is_not_zero_check, code);
3953                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3954
3955                                 x86_patch (end_jump, code);
3956                         }
3957                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3958                         break;
3959                 case OP_FBGE:
3960                         /* Branch if C013 == 100 or 001 */
3961                         if (cfg->opt & MONO_OPT_FCMOV) {
3962                                 guchar *br1;
3963
3964                                 /* skip branch if C1=1 */
3965                                 br1 = code;
3966                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3967                                 /* branch if (C0 | C3) = 1 */
3968                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3969                                 x86_patch (br1, code);
3970                                 break;
3971                         }
3972                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3973                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3974                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3975                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3976                         break;
3977                 case OP_FBGE_UN:
3978                         /* Branch if C013 == 000 */
3979                         if (cfg->opt & MONO_OPT_FCMOV) {
3980                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3981                                 break;
3982                         }
3983                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3984                         break;
3985                 case OP_FBLE:
3986                         /* Branch if C013=000 or 100 */
3987                         if (cfg->opt & MONO_OPT_FCMOV) {
3988                                 guchar *br1;
3989
3990                                 /* skip branch if C1=1 */
3991                                 br1 = code;
3992                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3993                                 /* branch if C0=0 */
3994                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3995                                 x86_patch (br1, code);
3996                                 break;
3997                         }
3998                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3999                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
4000                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4001                         break;
4002                 case OP_FBLE_UN:
4003                         /* Branch if C013 != 001 */
4004                         if (cfg->opt & MONO_OPT_FCMOV) {
4005                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4006                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
4007                                 break;
4008                         }
4009                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
4010                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4011                         break;
4012                 case CEE_CKFINITE: {
4013                         x86_push_reg (code, X86_EAX);
4014                         x86_fxam (code);
4015                         x86_fnstsw (code);
4016                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
4017                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
4018                         x86_pop_reg (code, X86_EAX);
4019                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
4020                         break;
4021                 }
4022                 case OP_TLS_GET: {
4023                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
4024                         break;
4025                 }
4026                 case OP_ATOMIC_ADD_I4: {
4027                         int dreg = ins->dreg;
4028
4029                         if (dreg == ins->inst_basereg) {
4030                                 x86_push_reg (code, ins->sreg2);
4031                                 dreg = ins->sreg2;
4032                         } 
4033                         
4034                         if (dreg != ins->sreg2)
4035                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
4036
4037                         x86_prefix (code, X86_LOCK_PREFIX);
4038                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4039
4040                         if (dreg != ins->dreg) {
4041                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4042                                 x86_pop_reg (code, dreg);
4043                         }
4044
4045                         break;
4046                 }
4047                 case OP_ATOMIC_ADD_NEW_I4: {
4048                         int dreg = ins->dreg;
4049
4050                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
4051                         if (ins->sreg2 == dreg) {
4052                                 if (dreg == X86_EBX) {
4053                                         dreg = X86_EDI;
4054                                         if (ins->inst_basereg == X86_EDI)
4055                                                 dreg = X86_ESI;
4056                                 } else {
4057                                         dreg = X86_EBX;
4058                                         if (ins->inst_basereg == X86_EBX)
4059                                                 dreg = X86_EDI;
4060                                 }
4061                         } else if (ins->inst_basereg == dreg) {
4062                                 if (dreg == X86_EBX) {
4063                                         dreg = X86_EDI;
4064                                         if (ins->sreg2 == X86_EDI)
4065                                                 dreg = X86_ESI;
4066                                 } else {
4067                                         dreg = X86_EBX;
4068                                         if (ins->sreg2 == X86_EBX)
4069                                                 dreg = X86_EDI;
4070                                 }
4071                         }
4072
4073                         if (dreg != ins->dreg) {
4074                                 x86_push_reg (code, dreg);
4075                         }
4076
4077                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
4078                         x86_prefix (code, X86_LOCK_PREFIX);
4079                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4080                         /* dreg contains the old value, add with sreg2 value */
4081                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
4082                         
4083                         if (ins->dreg != dreg) {
4084                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4085                                 x86_pop_reg (code, dreg);
4086                         }
4087
4088                         break;
4089                 }
4090                 case OP_ATOMIC_EXCHANGE_I4: {
4091                         guchar *br[2];
4092                         int sreg2 = ins->sreg2;
4093                         int breg = ins->inst_basereg;
4094
4095                         /* cmpxchg uses eax as comperand, need to make sure we can use it
4096                          * hack to overcome limits in x86 reg allocator 
4097                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
4098                          */
4099                         if (ins->dreg != X86_EAX)
4100                                 x86_push_reg (code, X86_EAX);
4101                         
4102                         /* We need the EAX reg for the cmpxchg */
4103                         if (ins->sreg2 == X86_EAX) {
4104                                 x86_push_reg (code, X86_EDX);
4105                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
4106                                 sreg2 = X86_EDX;
4107                         }
4108
4109                         if (breg == X86_EAX) {
4110                                 x86_push_reg (code, X86_ESI);
4111                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
4112                                 breg = X86_ESI;
4113                         }
4114
4115                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
4116
4117                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
4118                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
4119                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
4120                         x86_patch (br [1], br [0]);
4121
4122                         if (breg != ins->inst_basereg)
4123                                 x86_pop_reg (code, X86_ESI);
4124
4125                         if (ins->dreg != X86_EAX) {
4126                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
4127                                 x86_pop_reg (code, X86_EAX);
4128                         }
4129
4130                         if (ins->sreg2 != sreg2)
4131                                 x86_pop_reg (code, X86_EDX);
4132
4133                         break;
4134                 }
4135                 default:
4136                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
4137                         g_assert_not_reached ();
4138                 }
4139
4140                 if ((code - cfg->native_code - offset) > max_len) {
4141                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4142                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4143                         g_assert_not_reached ();
4144                 }
4145                
4146                 cpos += max_len;
4147
4148                 last_ins = ins;
4149                 last_offset = offset;
4150                 
4151                 ins = ins->next;
4152         }
4153
4154         cfg->code_len = code - cfg->native_code;
4155 }
4156
4157 void
4158 mono_arch_register_lowlevel_calls (void)
4159 {
4160 }
4161
4162 void
4163 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4164 {
4165         MonoJumpInfo *patch_info;
4166         gboolean compile_aot = !run_cctors;
4167
4168         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4169                 unsigned char *ip = patch_info->ip.i + code;
4170                 const unsigned char *target;
4171
4172                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4173
4174                 if (compile_aot) {
4175                         switch (patch_info->type) {
4176                         case MONO_PATCH_INFO_BB:
4177                         case MONO_PATCH_INFO_LABEL:
4178                                 break;
4179                         default:
4180                                 /* No need to patch these */
4181                                 continue;
4182                         }
4183                 }
4184
4185                 switch (patch_info->type) {
4186                 case MONO_PATCH_INFO_IP:
4187                         *((gconstpointer *)(ip)) = target;
4188                         break;
4189                 case MONO_PATCH_INFO_CLASS_INIT: {
4190                         guint8 *code = ip;
4191                         /* Might already been changed to a nop */
4192                         x86_call_code (code, 0);
4193                         x86_patch (ip, target);
4194                         break;
4195                 }
4196                 case MONO_PATCH_INFO_ABS:
4197                 case MONO_PATCH_INFO_METHOD:
4198                 case MONO_PATCH_INFO_METHOD_JUMP:
4199                 case MONO_PATCH_INFO_INTERNAL_METHOD:
4200                 case MONO_PATCH_INFO_BB:
4201                 case MONO_PATCH_INFO_LABEL:
4202                         x86_patch (ip, target);
4203                         break;
4204                 case MONO_PATCH_INFO_NONE:
4205                         break;
4206                 default: {
4207                         guint32 offset = mono_arch_get_patch_offset (ip);
4208                         *((gconstpointer *)(ip + offset)) = target;
4209                         break;
4210                 }
4211                 }
4212         }
4213 }
4214
4215 guint8 *
4216 mono_arch_emit_prolog (MonoCompile *cfg)
4217 {
4218         MonoMethod *method = cfg->method;
4219         MonoBasicBlock *bb;
4220         MonoMethodSignature *sig;
4221         MonoInst *inst;
4222         int alloc_size, pos, max_offset, i;
4223         guint8 *code;
4224
4225         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
4226         code = cfg->native_code = g_malloc (cfg->code_size);
4227
4228         x86_push_reg (code, X86_EBP);
4229         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4230
4231         alloc_size = - cfg->stack_offset;
4232         pos = 0;
4233
4234         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4235                 /* Might need to attach the thread to the JIT */
4236                 if (lmf_tls_offset != -1) {
4237                         guint8 *buf;
4238
4239                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
4240 #ifdef PLATFORM_WIN32
4241                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4242                         /* FIXME: Add a separate key for LMF to avoid this */
4243                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4244 #endif
4245                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
4246                         buf = code;
4247                         x86_branch8 (code, X86_CC_NE, 0, 0);
4248                         x86_push_imm (code, cfg->domain);
4249                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4250                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4251                         x86_patch (buf, code);
4252                 }
4253                 else {
4254                         g_assert (!cfg->compile_aot);
4255                         x86_push_imm (code, cfg->domain);
4256                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4257                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4258                 }
4259         }
4260
4261         if (method->save_lmf) {
4262                 pos += sizeof (MonoLMF);
4263
4264                 /* save the current IP */
4265                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4266                 x86_push_imm_template (code);
4267
4268                 /* save all caller saved regs */
4269                 x86_push_reg (code, X86_EBP);
4270                 x86_push_reg (code, X86_ESI);
4271                 x86_push_reg (code, X86_EDI);
4272                 x86_push_reg (code, X86_EBX);
4273
4274                 /* save method info */
4275                 x86_push_imm (code, method);
4276
4277                 /* get the address of lmf for the current thread */
4278                 /* 
4279                  * This is performance critical so we try to use some tricks to make
4280                  * it fast.
4281                  */
4282                 if (lmf_tls_offset != -1) {
4283                         /* Load lmf quicky using the GS register */
4284                         code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
4285 #ifdef PLATFORM_WIN32
4286                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4287                         /* FIXME: Add a separate key for LMF to avoid this */
4288                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4289 #endif
4290                 }
4291                 else {
4292                         if (cfg->compile_aot) {
4293                                 /* The GOT var does not exist yet */
4294                                 x86_call_imm (code, 0);
4295                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4296                                 x86_pop_reg (code, X86_EAX);
4297                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4298                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4299                                 x86_call_membase (code, X86_EAX, 0xf0f0f0f0);
4300                         }
4301                         else
4302                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4303                 }
4304
4305                 /* push lmf */
4306                 x86_push_reg (code, X86_EAX); 
4307                 /* push *lfm (previous_lmf) */
4308                 x86_push_membase (code, X86_EAX, 0);
4309                 /* *(lmf) = ESP */
4310                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4311         } else {
4312
4313                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4314                         x86_push_reg (code, X86_EBX);
4315                         pos += 4;
4316                 }
4317
4318                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4319                         x86_push_reg (code, X86_EDI);
4320                         pos += 4;
4321                 }
4322
4323                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4324                         x86_push_reg (code, X86_ESI);
4325                         pos += 4;
4326                 }
4327         }
4328
4329         alloc_size -= pos;
4330
4331         if (alloc_size) {
4332                 /* See mono_emit_stack_alloc */
4333 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
4334                 guint32 remaining_size = alloc_size;
4335                 while (remaining_size >= 0x1000) {
4336                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4337                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4338                         remaining_size -= 0x1000;
4339                 }
4340                 if (remaining_size)
4341                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4342 #else
4343                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4344 #endif
4345         }
4346
4347         /* compute max_offset in order to use short forward jumps */
4348         max_offset = 0;
4349         if (cfg->opt & MONO_OPT_BRANCH) {
4350                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4351                         MonoInst *ins = bb->code;
4352                         bb->max_offset = max_offset;
4353
4354                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4355                                 max_offset += 6;
4356                         /* max alignment for loops */
4357                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4358                                 max_offset += LOOP_ALIGNMENT;
4359
4360                         while (ins) {
4361                                 if (ins->opcode == OP_LABEL)
4362                                         ins->inst_c1 = max_offset;
4363                                 
4364                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
4365                                 ins = ins->next;
4366                         }
4367                 }
4368         }
4369
4370         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4371                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4372
4373         /* load arguments allocated to register from the stack */
4374         sig = mono_method_signature (method);
4375         pos = 0;
4376
4377         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4378                 inst = cfg->varinfo [pos];
4379                 if (inst->opcode == OP_REGVAR) {
4380                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4381                         if (cfg->verbose_level > 2)
4382                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4383                 }
4384                 pos++;
4385         }
4386
4387         cfg->code_len = code - cfg->native_code;
4388
4389         return code;
4390 }
4391
4392 void
4393 mono_arch_emit_epilog (MonoCompile *cfg)
4394 {
4395         MonoMethod *method = cfg->method;
4396         MonoMethodSignature *sig = mono_method_signature (method);
4397         int quad, pos;
4398         guint32 stack_to_pop;
4399         guint8 *code;
4400         int max_epilog_size = 16;
4401         CallInfo *cinfo;
4402         
4403         if (cfg->method->save_lmf)
4404                 max_epilog_size += 128;
4405         
4406         if (mono_jit_trace_calls != NULL)
4407                 max_epilog_size += 50;
4408
4409         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4410                 cfg->code_size *= 2;
4411                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4412                 mono_jit_stats.code_reallocs++;
4413         }
4414
4415         code = cfg->native_code + cfg->code_len;
4416
4417         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4418                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4419
4420         /* the code restoring the registers must be kept in sync with CEE_JMP */
4421         pos = 0;
4422         
4423         if (method->save_lmf) {
4424                 gint32 prev_lmf_reg;
4425
4426                 /* Find a spare register */
4427                 switch (sig->ret->type) {
4428                 case MONO_TYPE_I8:
4429                 case MONO_TYPE_U8:
4430                         prev_lmf_reg = X86_EDI;
4431                         cfg->used_int_regs |= (1 << X86_EDI);
4432                         break;
4433                 default:
4434                         prev_lmf_reg = X86_EDX;
4435                         break;
4436                 }
4437
4438                 /* reg = previous_lmf */
4439                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, -32, 4);
4440
4441                 /* ecx = lmf */
4442                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, -28, 4);
4443
4444                 /* *(lmf) = previous_lmf */
4445                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4446
4447                 /* restore caller saved regs */
4448                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4449                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, -20, 4);
4450                 }
4451
4452                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4453                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, -16, 4);
4454                 }
4455                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4456                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, -12, 4);
4457                 }
4458
4459                 /* EBP is restored by LEAVE */
4460         } else {
4461                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4462                         pos -= 4;
4463                 }
4464                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4465                         pos -= 4;
4466                 }
4467                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4468                         pos -= 4;
4469                 }
4470
4471                 if (pos)
4472                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4473
4474                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4475                         x86_pop_reg (code, X86_ESI);
4476                 }
4477                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4478                         x86_pop_reg (code, X86_EDI);
4479                 }
4480                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4481                         x86_pop_reg (code, X86_EBX);
4482                 }
4483         }
4484
4485         /* Load returned vtypes into registers if needed */
4486         cinfo = get_call_info (sig, FALSE);
4487         if (cinfo->ret.storage == ArgValuetypeInReg) {
4488                 for (quad = 0; quad < 2; quad ++) {
4489                         switch (cinfo->ret.pair_storage [quad]) {
4490                         case ArgInIReg:
4491                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
4492                                 break;
4493                         case ArgOnFloatFpStack:
4494                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
4495                                 break;
4496                         case ArgOnDoubleFpStack:
4497                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
4498                                 break;
4499                         case ArgNone:
4500                                 break;
4501                         default:
4502                                 g_assert_not_reached ();
4503                         }
4504                 }
4505         }
4506
4507         x86_leave (code);
4508
4509         if (CALLCONV_IS_STDCALL (sig)) {
4510                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4511
4512                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4513         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
4514                 stack_to_pop = 4;
4515         else
4516                 stack_to_pop = 0;
4517
4518         if (stack_to_pop)
4519                 x86_ret_imm (code, stack_to_pop);
4520         else
4521                 x86_ret (code);
4522
4523         g_free (cinfo);
4524
4525         cfg->code_len = code - cfg->native_code;
4526
4527         g_assert (cfg->code_len < cfg->code_size);
4528 }
4529
4530 void
4531 mono_arch_emit_exceptions (MonoCompile *cfg)
4532 {
4533         MonoJumpInfo *patch_info;
4534         int nthrows, i;
4535         guint8 *code;
4536         MonoClass *exc_classes [16];
4537         guint8 *exc_throw_start [16], *exc_throw_end [16];
4538         guint32 code_size;
4539         int exc_count = 0;
4540
4541         /* Compute needed space */
4542         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4543                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4544                         exc_count++;
4545         }
4546
4547         /* 
4548          * make sure we have enough space for exceptions
4549          * 16 is the size of two push_imm instructions and a call
4550          */
4551         if (cfg->compile_aot)
4552                 code_size = exc_count * 32;
4553         else
4554                 code_size = exc_count * 16;
4555
4556         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4557                 cfg->code_size *= 2;
4558                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4559                 mono_jit_stats.code_reallocs++;
4560         }
4561
4562         code = cfg->native_code + cfg->code_len;
4563
4564         nthrows = 0;
4565         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4566                 switch (patch_info->type) {
4567                 case MONO_PATCH_INFO_EXC: {
4568                         MonoClass *exc_class;
4569                         guint8 *buf, *buf2;
4570                         guint32 throw_ip;
4571
4572                         x86_patch (patch_info->ip.i + cfg->native_code, code);
4573
4574                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4575                         g_assert (exc_class);
4576                         throw_ip = patch_info->ip.i;
4577
4578                         /* Find a throw sequence for the same exception class */
4579                         for (i = 0; i < nthrows; ++i)
4580                                 if (exc_classes [i] == exc_class)
4581                                         break;
4582                         if (i < nthrows) {
4583                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4584                                 x86_jump_code (code, exc_throw_start [i]);
4585                                 patch_info->type = MONO_PATCH_INFO_NONE;
4586                         }
4587                         else {
4588                                 guint32 got_reg = X86_EAX;
4589                                 guint32 size;
4590
4591                                 /* Compute size of code following the push <OFFSET> */
4592                                 if (cfg->compile_aot) {
4593                                         size = 5 + 6;
4594                                         if (!cfg->got_var)
4595                                                 size += 32;
4596                                         else if (cfg->got_var->opcode == OP_REGOFFSET)
4597                                                 size += 6;
4598                                 }
4599                                 else
4600                                         size = 5 + 5;
4601
4602                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4603                                         /* Use the shorter form */
4604                                         buf = buf2 = code;
4605                                         x86_push_imm (code, 0);
4606                                 }
4607                                 else {
4608                                         buf = code;
4609                                         x86_push_imm (code, 0xf0f0f0f0);
4610                                         buf2 = code;
4611                                 }
4612
4613                                 if (nthrows < 16) {
4614                                         exc_classes [nthrows] = exc_class;
4615                                         exc_throw_start [nthrows] = code;
4616                                 }
4617
4618                                 if (cfg->compile_aot) {          
4619                                         /*
4620                                          * Since the patches are generated by the back end, there is                                     * no way to generate a got_var at this point.   
4621                                          */
4622                                         if (!cfg->got_var) {
4623                                                 x86_call_imm (code, 0);
4624                                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4625                                                 x86_pop_reg (code, X86_EAX);
4626                                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4627                                         }
4628                                         else {
4629                                                 if (cfg->got_var->opcode == OP_REGOFFSET)
4630                                                         x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
4631                                                 else
4632                                                         got_reg = cfg->got_var->dreg;
4633                                         }
4634                                 }
4635
4636                                 x86_push_imm (code, exc_class->type_token);
4637                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4638                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4639                                 patch_info->ip.i = code - cfg->native_code;
4640                                 if (cfg->compile_aot)
4641                                         x86_call_membase (code, got_reg, 0xf0f0f0f0);
4642                                 else
4643                                         x86_call_code (code, 0);
4644                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4645                                 while (buf < buf2)
4646                                         x86_nop (buf);
4647
4648                                 if (nthrows < 16) {
4649                                         exc_throw_end [nthrows] = code;
4650                                         nthrows ++;
4651                                 }
4652                         }
4653                         break;
4654                 }
4655                 default:
4656                         /* do nothing */
4657                         break;
4658                 }
4659         }
4660
4661         cfg->code_len = code - cfg->native_code;
4662
4663         g_assert (cfg->code_len < cfg->code_size);
4664 }
4665
4666 void
4667 mono_arch_flush_icache (guint8 *code, gint size)
4668 {
4669         /* not needed */
4670 }
4671
4672 void
4673 mono_arch_flush_register_windows (void)
4674 {
4675 }
4676
4677 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4678
4679 static void
4680 setup_stack (MonoJitTlsData *tls)
4681 {
4682         pthread_t self = pthread_self();
4683         pthread_attr_t attr;
4684         size_t stsize = 0;
4685         struct sigaltstack sa;
4686         guint8 *staddr = NULL;
4687         guint8 *current = (guint8*)&staddr;
4688
4689         if (mono_running_on_valgrind ())
4690                 return;
4691
4692         /* Determine stack boundaries */
4693         pthread_attr_init( &attr );
4694 #ifdef HAVE_PTHREAD_GETATTR_NP
4695         pthread_getattr_np( self, &attr );
4696 #else
4697 #ifdef HAVE_PTHREAD_ATTR_GET_NP
4698         pthread_attr_get_np( self, &attr );
4699 #elif defined(sun)
4700         pthread_attr_getstacksize( &attr, &stsize );
4701 #else
4702 #error "Not implemented"
4703 #endif
4704 #endif
4705 #ifndef sun
4706         pthread_attr_getstack( &attr, (void**)&staddr, &stsize );
4707 #endif
4708
4709         g_assert (staddr);
4710
4711         g_assert ((current > staddr) && (current < staddr + stsize));
4712
4713         tls->end_of_stack = staddr + stsize;
4714
4715         /*
4716          * threads created by nptl does not seem to have a guard page, and
4717          * since the main thread is not created by us, we can't even set one.
4718          * Increasing stsize fools the SIGSEGV signal handler into thinking this
4719          * is a stack overflow exception.
4720          */
4721         tls->stack_size = stsize + getpagesize ();
4722
4723         /* Setup an alternate signal stack */
4724         tls->signal_stack = mmap (0, SIGNAL_STACK_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
4725         tls->signal_stack_size = SIGNAL_STACK_SIZE;
4726
4727         g_assert (tls->signal_stack);
4728
4729         sa.ss_sp = tls->signal_stack;
4730         sa.ss_size = SIGNAL_STACK_SIZE;
4731         sa.ss_flags = SS_ONSTACK;
4732         sigaltstack (&sa, NULL);
4733 }
4734
4735 #endif
4736
4737 /*
4738  * Support for fast access to the thread-local lmf structure using the GS
4739  * segment register on NPTL + kernel 2.6.x.
4740  */
4741
4742 static gboolean tls_offset_inited = FALSE;
4743
4744 void
4745 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4746 {
4747         if (!tls_offset_inited) {
4748                 if (!getenv ("MONO_NO_TLS")) {
4749 #ifdef PLATFORM_WIN32
4750                         /* 
4751                          * We need to init this multiple times, since when we are first called, the key might not
4752                          * be initialized yet.
4753                          */
4754                         appdomain_tls_offset = mono_domain_get_tls_key ();
4755                         lmf_tls_offset = mono_get_jit_tls_key ();
4756                         thread_tls_offset = mono_thread_get_tls_key ();
4757
4758                         /* Only 64 tls entries can be accessed using inline code */
4759                         if (appdomain_tls_offset >= 64)
4760                                 appdomain_tls_offset = -1;
4761                         if (lmf_tls_offset >= 64)
4762                                 lmf_tls_offset = -1;
4763                         if (thread_tls_offset >= 64)
4764                                 thread_tls_offset = -1;
4765 #else
4766                         tls_offset_inited = TRUE;
4767                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4768                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4769                         thread_tls_offset = mono_thread_get_tls_offset ();
4770 #endif
4771                 }
4772         }               
4773
4774 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4775         setup_stack (tls);
4776 #endif
4777 }
4778
4779 void
4780 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4781 {
4782 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4783         struct sigaltstack sa;
4784
4785         sa.ss_sp = tls->signal_stack;
4786         sa.ss_size = SIGNAL_STACK_SIZE;
4787         sa.ss_flags = SS_DISABLE;
4788         sigaltstack  (&sa, NULL);
4789
4790         if (tls->signal_stack)
4791                 munmap (tls->signal_stack, SIGNAL_STACK_SIZE);
4792 #endif
4793 }
4794
4795 void
4796 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4797 {
4798
4799         /* add the this argument */
4800         if (this_reg != -1) {
4801                 MonoInst *this;
4802                 MONO_INST_NEW (cfg, this, OP_OUTARG);
4803                 this->type = this_type;
4804                 this->sreg1 = this_reg;
4805                 mono_bblock_add_inst (cfg->cbb, this);
4806         }
4807
4808         if (vt_reg != -1) {
4809                 CallInfo * cinfo = get_call_info (inst->signature, FALSE);
4810                 MonoInst *vtarg;
4811
4812                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4813                         /*
4814                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4815                          * the stack. Save the address here, so the call instruction can
4816                          * access it.
4817                          */
4818                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4819                         vtarg->inst_destbasereg = X86_ESP;
4820                         vtarg->inst_offset = inst->stack_usage;
4821                         vtarg->sreg1 = vt_reg;
4822                         mono_bblock_add_inst (cfg->cbb, vtarg);
4823                 }
4824                 else {
4825                         MonoInst *vtarg;
4826                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4827                         vtarg->type = STACK_MP;
4828                         vtarg->sreg1 = vt_reg;
4829                         mono_bblock_add_inst (cfg->cbb, vtarg);
4830                 }
4831
4832                 g_free (cinfo);
4833         }
4834 }
4835
4836
4837 MonoInst*
4838 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4839 {
4840         MonoInst *ins = NULL;
4841
4842         if (cmethod->klass == mono_defaults.math_class) {
4843                 if (strcmp (cmethod->name, "Sin") == 0) {
4844                         MONO_INST_NEW (cfg, ins, OP_SIN);
4845                         ins->inst_i0 = args [0];
4846                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4847                         MONO_INST_NEW (cfg, ins, OP_COS);
4848                         ins->inst_i0 = args [0];
4849                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4850                         MONO_INST_NEW (cfg, ins, OP_TAN);
4851                         ins->inst_i0 = args [0];
4852                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4853                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4854                         ins->inst_i0 = args [0];
4855                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4856                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4857                         ins->inst_i0 = args [0];
4858                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4859                         MONO_INST_NEW (cfg, ins, OP_ABS);
4860                         ins->inst_i0 = args [0];
4861                 }
4862 #if 0
4863                 /* OP_FREM is not IEEE compatible */
4864                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4865                         MONO_INST_NEW (cfg, ins, OP_FREM);
4866                         ins->inst_i0 = args [0];
4867                         ins->inst_i1 = args [1];
4868                 }
4869 #endif
4870         } else if(cmethod->klass->image == mono_defaults.corlib &&
4871                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4872                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4873
4874                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4875                         MonoInst *ins_iconst;
4876
4877                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4878                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4879                         ins_iconst->inst_c0 = 1;
4880
4881                         ins->inst_i0 = args [0];
4882                         ins->inst_i1 = ins_iconst;
4883                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4884                         MonoInst *ins_iconst;
4885
4886                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4887                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4888                         ins_iconst->inst_c0 = -1;
4889
4890                         ins->inst_i0 = args [0];
4891                         ins->inst_i1 = ins_iconst;
4892                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4893                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4894
4895                         ins->inst_i0 = args [0];
4896                         ins->inst_i1 = args [1];
4897                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4898                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_I4);
4899
4900                         ins->inst_i0 = args [0];
4901                         ins->inst_i1 = args [1];
4902                 }
4903         }
4904
4905         return ins;
4906 }
4907
4908
4909 gboolean
4910 mono_arch_print_tree (MonoInst *tree, int arity)
4911 {
4912         return 0;
4913 }
4914
4915 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4916 {
4917         MonoInst* ins;
4918         
4919         if (appdomain_tls_offset == -1)
4920                 return NULL;
4921
4922         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4923         ins->inst_offset = appdomain_tls_offset;
4924         return ins;
4925 }
4926
4927 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4928 {
4929         MonoInst* ins;
4930
4931         if (thread_tls_offset == -1)
4932                 return NULL;
4933
4934         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4935         ins->inst_offset = thread_tls_offset;
4936         return ins;
4937 }
4938
4939 guint32
4940 mono_arch_get_patch_offset (guint8 *code)
4941 {
4942         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4943                 return 2;
4944         else if ((code [0] == 0xba))
4945                 return 1;
4946         else if ((code [0] == 0x68))
4947                 /* push IMM */
4948                 return 1;
4949         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4950                 /* push <OFFSET>(<REG>) */
4951                 return 2;
4952         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4953                 /* call *<OFFSET>(<REG>) */
4954                 return 2;
4955         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4956                 /* fldl <ADDR> */
4957                 return 2;
4958         else if ((code [0] == 0x58) && (code [1] == 0x05))
4959                 /* pop %eax; add <OFFSET>, %eax */
4960                 return 2;
4961         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4962                 /* pop <REG>; add <OFFSET>, <REG> */
4963                 return 3;
4964         else {
4965                 g_assert_not_reached ();
4966                 return -1;
4967         }
4968 }
4969
4970 gpointer*
4971 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4972 {
4973         guint8 reg = 0;
4974         gint32 disp = 0;
4975
4976         /* go to the start of the call instruction
4977          *
4978          * address_byte = (m << 6) | (o << 3) | reg
4979          * call opcode: 0xff address_byte displacement
4980          * 0xff m=1,o=2 imm8
4981          * 0xff m=2,o=2 imm32
4982          */
4983         code -= 6;
4984         if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4985                 reg = code [4] & 0x07;
4986                 disp = (signed char)code [5];
4987         } else {
4988                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4989                         reg = code [1] & 0x07;
4990                         disp = *((gint32*)(code + 2));
4991                 } else if ((code [1] == 0xe8)) {
4992                         return NULL;
4993                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4994                         /*
4995                          * This is a interface call: should check the above code can't catch it earlier 
4996                          * 8b 40 30   mov    0x30(%eax),%eax
4997                          * ff 10      call   *(%eax)
4998                          */
4999                         disp = 0;
5000                         reg = code [5] & 0x07;
5001                 }
5002                 else
5003                         return NULL;
5004         }
5005
5006         return (gpointer*)(((gint32)(regs [reg])) + disp);
5007 }
5008
5009 gpointer* 
5010 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
5011 {
5012         guint8 reg = 0;
5013         gint32 disp = 0;
5014
5015         code -= 7;
5016         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
5017                 reg = x86_modrm_rm (code [1]);
5018                 disp = code [4];
5019
5020                 if (reg == X86_EAX)
5021                         return NULL;
5022                 else
5023                         return (gpointer*)(((gint32)(regs [reg])) + disp);
5024         }
5025
5026         return NULL;
5027 }