copying the latest Sys.Web.Services from trunk.
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14
15 #ifndef PLATFORM_WIN32
16 #include <unistd.h>
17 #include <sys/mman.h>
18 #endif
19
20 #include <mono/metadata/appdomain.h>
21 #include <mono/metadata/debug-helpers.h>
22 #include <mono/metadata/threads.h>
23 #include <mono/metadata/profiler-private.h>
24 #include <mono/utils/mono-math.h>
25
26 #include "trace.h"
27 #include "mini-x86.h"
28 #include "inssel.h"
29 #include "cpu-pentium.h"
30
31 /* On windows, these hold the key returned by TlsAlloc () */
32 static gint lmf_tls_offset = -1;
33 static gint appdomain_tls_offset = -1;
34 static gint thread_tls_offset = -1;
35
36 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
37
38 #ifdef PLATFORM_WIN32
39 /* Under windows, the default pinvoke calling convention is stdcall */
40 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
41 #else
42 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
43 #endif
44
45 #define SIGNAL_STACK_SIZE (64 * 1024)
46
47 #define NOT_IMPLEMENTED g_assert_not_reached ()
48
49 const char*
50 mono_arch_regname (int reg) {
51         switch (reg) {
52         case X86_EAX: return "%eax";
53         case X86_EBX: return "%ebx";
54         case X86_ECX: return "%ecx";
55         case X86_EDX: return "%edx";
56         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
57         case X86_EDI: return "%edi";
58         case X86_ESI: return "%esi";
59         }
60         return "unknown";
61 }
62
63 typedef enum {
64         ArgInIReg,
65         ArgInFloatSSEReg,
66         ArgInDoubleSSEReg,
67         ArgOnStack,
68         ArgValuetypeInReg,
69         ArgOnFloatFpStack,
70         ArgOnDoubleFpStack,
71         ArgNone
72 } ArgStorage;
73
74 typedef struct {
75         gint16 offset;
76         gint8  reg;
77         ArgStorage storage;
78
79         /* Only if storage == ArgValuetypeInReg */
80         ArgStorage pair_storage [2];
81         gint8 pair_regs [2];
82 } ArgInfo;
83
84 typedef struct {
85         int nargs;
86         guint32 stack_usage;
87         guint32 reg_usage;
88         guint32 freg_usage;
89         gboolean need_stack_align;
90         ArgInfo ret;
91         ArgInfo sig_cookie;
92         ArgInfo args [1];
93 } CallInfo;
94
95 #define PARAM_REGS 0
96
97 #define FLOAT_PARAM_REGS 0
98
99 static X86_Reg_No param_regs [] = { 0 };
100
101 #ifdef PLATFORM_WIN32
102 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
103 #endif
104
105 static void inline
106 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
107 {
108     ainfo->offset = *stack_size;
109
110     if (*gr >= PARAM_REGS) {
111                 ainfo->storage = ArgOnStack;
112                 (*stack_size) += sizeof (gpointer);
113     }
114     else {
115                 ainfo->storage = ArgInIReg;
116                 ainfo->reg = param_regs [*gr];
117                 (*gr) ++;
118     }
119 }
120
121 static void inline
122 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
123 {
124         ainfo->offset = *stack_size;
125
126         g_assert (PARAM_REGS == 0);
127         
128         ainfo->storage = ArgOnStack;
129         (*stack_size) += sizeof (gpointer) * 2;
130 }
131
132 static void inline
133 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
134 {
135     ainfo->offset = *stack_size;
136
137     if (*gr >= FLOAT_PARAM_REGS) {
138                 ainfo->storage = ArgOnStack;
139                 (*stack_size) += sizeof (gpointer);
140     }
141     else {
142                 /* A double register */
143                 if (is_double)
144                         ainfo->storage = ArgInDoubleSSEReg;
145                 else
146                         ainfo->storage = ArgInFloatSSEReg;
147                 ainfo->reg = *gr;
148                 (*gr) += 1;
149     }
150 }
151
152
153 static void
154 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
155                gboolean is_return,
156                guint32 *gr, guint32 *fr, guint32 *stack_size)
157 {
158         guint32 size;
159         MonoClass *klass;
160
161         klass = mono_class_from_mono_type (type);
162         if (sig->pinvoke) 
163                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
164         else 
165                 size = mono_type_stack_size (&klass->byval_arg, NULL);
166
167 #ifdef PLATFORM_WIN32
168         if (sig->pinvoke && is_return) {
169                 MonoMarshalType *info;
170
171                 /*
172                  * the exact rules are not very well documented, the code below seems to work with the 
173                  * code generated by gcc 3.3.3 -mno-cygwin.
174                  */
175                 info = mono_marshal_load_type_info (klass);
176                 g_assert (info);
177
178                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
179
180                 /* Special case structs with only a float member */
181                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
182                         ainfo->storage = ArgValuetypeInReg;
183                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
184                         return;
185                 }
186                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
187                         ainfo->storage = ArgValuetypeInReg;
188                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
189                         return;
190                 }               
191                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
192                         ainfo->storage = ArgValuetypeInReg;
193                         ainfo->pair_storage [0] = ArgInIReg;
194                         ainfo->pair_regs [0] = return_regs [0];
195                         if (info->native_size > 4) {
196                                 ainfo->pair_storage [1] = ArgInIReg;
197                                 ainfo->pair_regs [1] = return_regs [1];
198                         }
199                         return;
200                 }
201         }
202 #endif
203
204         ainfo->offset = *stack_size;
205         ainfo->storage = ArgOnStack;
206         *stack_size += ALIGN_TO (size, sizeof (gpointer));
207 }
208
209 /*
210  * get_call_info:
211  *
212  *  Obtain information about a call according to the calling convention.
213  * For x86 ELF, see the "System V Application Binary Interface Intel386 
214  * Architecture Processor Supplment, Fourth Edition" document for more
215  * information.
216  * For x86 win32, see ???.
217  */
218 static CallInfo*
219 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
220 {
221         guint32 i, gr, fr;
222         MonoType *ret_type;
223         int n = sig->hasthis + sig->param_count;
224         guint32 stack_size = 0;
225         CallInfo *cinfo;
226
227         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
228
229         gr = 0;
230         fr = 0;
231
232         /* return value */
233         {
234                 ret_type = mono_type_get_underlying_type (sig->ret);
235                 switch (ret_type->type) {
236                 case MONO_TYPE_BOOLEAN:
237                 case MONO_TYPE_I1:
238                 case MONO_TYPE_U1:
239                 case MONO_TYPE_I2:
240                 case MONO_TYPE_U2:
241                 case MONO_TYPE_CHAR:
242                 case MONO_TYPE_I4:
243                 case MONO_TYPE_U4:
244                 case MONO_TYPE_I:
245                 case MONO_TYPE_U:
246                 case MONO_TYPE_PTR:
247                 case MONO_TYPE_FNPTR:
248                 case MONO_TYPE_CLASS:
249                 case MONO_TYPE_OBJECT:
250                 case MONO_TYPE_SZARRAY:
251                 case MONO_TYPE_ARRAY:
252                 case MONO_TYPE_STRING:
253                         cinfo->ret.storage = ArgInIReg;
254                         cinfo->ret.reg = X86_EAX;
255                         break;
256                 case MONO_TYPE_U8:
257                 case MONO_TYPE_I8:
258                         cinfo->ret.storage = ArgInIReg;
259                         cinfo->ret.reg = X86_EAX;
260                         break;
261                 case MONO_TYPE_R4:
262                         cinfo->ret.storage = ArgOnFloatFpStack;
263                         break;
264                 case MONO_TYPE_R8:
265                         cinfo->ret.storage = ArgOnDoubleFpStack;
266                         break;
267                 case MONO_TYPE_VALUETYPE: {
268                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
269
270                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
271                         if (cinfo->ret.storage == ArgOnStack)
272                                 /* The caller passes the address where the value is stored */
273                                 add_general (&gr, &stack_size, &cinfo->ret);
274                         break;
275                 }
276                 case MONO_TYPE_TYPEDBYREF:
277                         /* Same as a valuetype with size 24 */
278                         add_general (&gr, &stack_size, &cinfo->ret);
279                         ;
280                         break;
281                 case MONO_TYPE_VOID:
282                         cinfo->ret.storage = ArgNone;
283                         break;
284                 default:
285                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
286                 }
287         }
288
289         /* this */
290         if (sig->hasthis)
291                 add_general (&gr, &stack_size, cinfo->args + 0);
292
293         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
294                 gr = PARAM_REGS;
295                 fr = FLOAT_PARAM_REGS;
296                 
297                 /* Emit the signature cookie just before the implicit arguments */
298                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
299         }
300
301         for (i = 0; i < sig->param_count; ++i) {
302                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
303                 MonoType *ptype;
304
305                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
306                         /* We allways pass the sig cookie on the stack for simplicity */
307                         /* 
308                          * Prevent implicit arguments + the sig cookie from being passed 
309                          * in registers.
310                          */
311                         gr = PARAM_REGS;
312                         fr = FLOAT_PARAM_REGS;
313
314                         /* Emit the signature cookie just before the implicit arguments */
315                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
316                 }
317
318                 if (sig->params [i]->byref) {
319                         add_general (&gr, &stack_size, ainfo);
320                         continue;
321                 }
322                 ptype = mono_type_get_underlying_type (sig->params [i]);
323                 switch (ptype->type) {
324                 case MONO_TYPE_BOOLEAN:
325                 case MONO_TYPE_I1:
326                 case MONO_TYPE_U1:
327                         add_general (&gr, &stack_size, ainfo);
328                         break;
329                 case MONO_TYPE_I2:
330                 case MONO_TYPE_U2:
331                 case MONO_TYPE_CHAR:
332                         add_general (&gr, &stack_size, ainfo);
333                         break;
334                 case MONO_TYPE_I4:
335                 case MONO_TYPE_U4:
336                         add_general (&gr, &stack_size, ainfo);
337                         break;
338                 case MONO_TYPE_I:
339                 case MONO_TYPE_U:
340                 case MONO_TYPE_PTR:
341                 case MONO_TYPE_FNPTR:
342                 case MONO_TYPE_CLASS:
343                 case MONO_TYPE_OBJECT:
344                 case MONO_TYPE_STRING:
345                 case MONO_TYPE_SZARRAY:
346                 case MONO_TYPE_ARRAY:
347                         add_general (&gr, &stack_size, ainfo);
348                         break;
349                 case MONO_TYPE_VALUETYPE:
350                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
351                         break;
352                 case MONO_TYPE_TYPEDBYREF:
353                         stack_size += sizeof (MonoTypedRef);
354                         ainfo->storage = ArgOnStack;
355                         break;
356                 case MONO_TYPE_U8:
357                 case MONO_TYPE_I8:
358                         add_general_pair (&gr, &stack_size, ainfo);
359                         break;
360                 case MONO_TYPE_R4:
361                         add_float (&fr, &stack_size, ainfo, FALSE);
362                         break;
363                 case MONO_TYPE_R8:
364                         add_float (&fr, &stack_size, ainfo, TRUE);
365                         break;
366                 default:
367                         g_error ("unexpected type 0x%x", ptype->type);
368                         g_assert_not_reached ();
369                 }
370         }
371
372         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
373                 gr = PARAM_REGS;
374                 fr = FLOAT_PARAM_REGS;
375                 
376                 /* Emit the signature cookie just before the implicit arguments */
377                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
378         }
379
380         cinfo->stack_usage = stack_size;
381         cinfo->reg_usage = gr;
382         cinfo->freg_usage = fr;
383         return cinfo;
384 }
385
386 /*
387  * mono_arch_get_argument_info:
388  * @csig:  a method signature
389  * @param_count: the number of parameters to consider
390  * @arg_info: an array to store the result infos
391  *
392  * Gathers information on parameters such as size, alignment and
393  * padding. arg_info should be large enought to hold param_count + 1 entries. 
394  *
395  * Returns the size of the activation frame.
396  */
397 int
398 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
399 {
400         int k, frame_size = 0;
401         int size, align, pad;
402         int offset = 8;
403         CallInfo *cinfo;
404
405         cinfo = get_call_info (csig, FALSE);
406
407         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
408                 frame_size += sizeof (gpointer);
409                 offset += 4;
410         }
411
412         arg_info [0].offset = offset;
413
414         if (csig->hasthis) {
415                 frame_size += sizeof (gpointer);
416                 offset += 4;
417         }
418
419         arg_info [0].size = frame_size;
420
421         for (k = 0; k < param_count; k++) {
422                 
423                 if (csig->pinvoke)
424                         size = mono_type_native_stack_size (csig->params [k], &align);
425                 else
426                         size = mono_type_stack_size (csig->params [k], &align);
427
428                 /* ignore alignment for now */
429                 align = 1;
430
431                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
432                 arg_info [k].pad = pad;
433                 frame_size += size;
434                 arg_info [k + 1].pad = 0;
435                 arg_info [k + 1].size = size;
436                 offset += pad;
437                 arg_info [k + 1].offset = offset;
438                 offset += size;
439         }
440
441         align = MONO_ARCH_FRAME_ALIGNMENT;
442         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
443         arg_info [k].pad = pad;
444
445         g_free (cinfo);
446
447         return frame_size;
448 }
449
450 static const guchar cpuid_impl [] = {
451         0x55,                           /* push   %ebp */
452         0x89, 0xe5,                     /* mov    %esp,%ebp */
453         0x53,                           /* push   %ebx */
454         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
455         0x0f, 0xa2,                     /* cpuid   */
456         0x50,                           /* push   %eax */
457         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
458         0x89, 0x18,                     /* mov    %ebx,(%eax) */
459         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
460         0x89, 0x08,                     /* mov    %ecx,(%eax) */
461         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
462         0x89, 0x10,                     /* mov    %edx,(%eax) */
463         0x58,                           /* pop    %eax */
464         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
465         0x89, 0x02,                     /* mov    %eax,(%edx) */
466         0x5b,                           /* pop    %ebx */
467         0xc9,                           /* leave   */
468         0xc3,                           /* ret     */
469 };
470
471 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
472
473 static int 
474 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
475 {
476         int have_cpuid = 0;
477 #ifndef _MSC_VER
478         __asm__  __volatile__ (
479                 "pushfl\n"
480                 "popl %%eax\n"
481                 "movl %%eax, %%edx\n"
482                 "xorl $0x200000, %%eax\n"
483                 "pushl %%eax\n"
484                 "popfl\n"
485                 "pushfl\n"
486                 "popl %%eax\n"
487                 "xorl %%edx, %%eax\n"
488                 "andl $0x200000, %%eax\n"
489                 "movl %%eax, %0"
490                 : "=r" (have_cpuid)
491                 :
492                 : "%eax", "%edx"
493         );
494 #else
495         __asm {
496                 pushfd
497                 pop eax
498                 mov edx, eax
499                 xor eax, 0x200000
500                 push eax
501                 popfd
502                 pushfd
503                 pop eax
504                 xor eax, edx
505                 and eax, 0x200000
506                 mov have_cpuid, eax
507         }
508 #endif
509         if (have_cpuid) {
510                 /* Have to use the code manager to get around WinXP DEP */
511                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
512                 CpuidFunc func;
513                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
514                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
515
516                 func = (CpuidFunc)ptr;
517                 func (id, p_eax, p_ebx, p_ecx, p_edx);
518
519                 mono_code_manager_destroy (codeman);
520
521                 /*
522                  * We use this approach because of issues with gcc and pic code, see:
523                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
524                 __asm__ __volatile__ ("cpuid"
525                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
526                         : "a" (id));
527                 */
528                 return 1;
529         }
530         return 0;
531 }
532
533 /*
534  * Initialize the cpu to execute managed code.
535  */
536 void
537 mono_arch_cpu_init (void)
538 {
539         /* spec compliance requires running with double precision */
540 #ifndef _MSC_VER
541         guint16 fpcw;
542
543         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
544         fpcw &= ~X86_FPCW_PRECC_MASK;
545         fpcw |= X86_FPCW_PREC_DOUBLE;
546         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
547         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
548 #else
549         _control87 (_PC_64, MCW_PC);
550 #endif
551 }
552
553 /*
554  * This function returns the optimizations supported on this cpu.
555  */
556 guint32
557 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
558 {
559         int eax, ebx, ecx, edx;
560         guint32 opts = 0;
561         
562         *exclude_mask = 0;
563         /* Feature Flags function, flags returned in EDX. */
564         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
565                 if (edx & (1 << 15)) {
566                         opts |= MONO_OPT_CMOV;
567                         if (edx & 1)
568                                 opts |= MONO_OPT_FCMOV;
569                         else
570                                 *exclude_mask |= MONO_OPT_FCMOV;
571                 } else
572                         *exclude_mask |= MONO_OPT_CMOV;
573         }
574         return opts;
575 }
576
577 /*
578  * Determine whenever the trap whose info is in SIGINFO is caused by
579  * integer overflow.
580  */
581 gboolean
582 mono_arch_is_int_overflow (void *sigctx, void *info)
583 {
584         MonoContext ctx;
585         guint8* ip;
586
587         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
588
589         ip = (guint8*)ctx.eip;
590
591         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
592                 gint32 reg;
593
594                 /* idiv REG */
595                 switch (x86_modrm_rm (ip [1])) {
596                 case X86_ECX:
597                         reg = ctx.ecx;
598                         break;
599                 case X86_EBX:
600                         reg = ctx.ebx;
601                         break;
602                 default:
603                         g_assert_not_reached ();
604                         reg = -1;
605                 }
606
607                 if (reg == -1)
608                         return TRUE;
609         }
610                         
611         return FALSE;
612 }
613
614 static gboolean
615 is_regsize_var (MonoType *t) {
616         if (t->byref)
617                 return TRUE;
618         switch (mono_type_get_underlying_type (t)->type) {
619         case MONO_TYPE_I4:
620         case MONO_TYPE_U4:
621         case MONO_TYPE_I:
622         case MONO_TYPE_U:
623         case MONO_TYPE_PTR:
624         case MONO_TYPE_FNPTR:
625                 return TRUE;
626         case MONO_TYPE_OBJECT:
627         case MONO_TYPE_STRING:
628         case MONO_TYPE_CLASS:
629         case MONO_TYPE_SZARRAY:
630         case MONO_TYPE_ARRAY:
631                 return TRUE;
632         case MONO_TYPE_VALUETYPE:
633                 return FALSE;
634         }
635         return FALSE;
636 }
637
638 GList *
639 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
640 {
641         GList *vars = NULL;
642         int i;
643
644         for (i = 0; i < cfg->num_varinfo; i++) {
645                 MonoInst *ins = cfg->varinfo [i];
646                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
647
648                 /* unused vars */
649                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
650                         continue;
651
652                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
653                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
654                         continue;
655
656                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
657                  * 8bit quantities in caller saved registers on x86 */
658                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
659                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
660                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
661                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
662                         g_assert (i == vmv->idx);
663                         vars = g_list_prepend (vars, vmv);
664                 }
665         }
666
667         vars = mono_varlist_sort (cfg, vars, 0);
668
669         return vars;
670 }
671
672 GList *
673 mono_arch_get_global_int_regs (MonoCompile *cfg)
674 {
675         GList *regs = NULL;
676
677         /* we can use 3 registers for global allocation */
678         regs = g_list_prepend (regs, (gpointer)X86_EBX);
679         regs = g_list_prepend (regs, (gpointer)X86_ESI);
680         regs = g_list_prepend (regs, (gpointer)X86_EDI);
681
682         return regs;
683 }
684
685 /*
686  * mono_arch_regalloc_cost:
687  *
688  *  Return the cost, in number of memory references, of the action of 
689  * allocating the variable VMV into a register during global register
690  * allocation.
691  */
692 guint32
693 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
694 {
695         MonoInst *ins = cfg->varinfo [vmv->idx];
696
697         if (cfg->method->save_lmf)
698                 /* The register is already saved */
699                 return (ins->opcode == OP_ARG) ? 1 : 0;
700         else
701                 /* push+pop+possible load if it is an argument */
702                 return (ins->opcode == OP_ARG) ? 3 : 2;
703 }
704  
705 /*
706  * Set var information according to the calling convention. X86 version.
707  * The locals var stuff should most likely be split in another method.
708  */
709 void
710 mono_arch_allocate_vars (MonoCompile *m)
711 {
712         MonoMethodSignature *sig;
713         MonoMethodHeader *header;
714         MonoInst *inst;
715         guint32 locals_stack_size, locals_stack_align;
716         int i, offset, curinst, size, align;
717         gint32 *offsets;
718         CallInfo *cinfo;
719
720         header = mono_method_get_header (m->method);
721         sig = mono_method_signature (m->method);
722
723         offset = 8;
724         curinst = 0;
725
726         cinfo = get_call_info (sig, FALSE);
727
728         switch (cinfo->ret.storage) {
729         case ArgOnStack:
730                 m->ret->opcode = OP_REGOFFSET;
731                 m->ret->inst_basereg = X86_EBP;
732                 m->ret->inst_offset = offset;
733                 offset += sizeof (gpointer);
734                 break;
735         case ArgValuetypeInReg:
736                 break;
737         case ArgInIReg:
738                 m->ret->opcode = OP_REGVAR;
739                 m->ret->inst_c0 = cinfo->ret.reg;
740                 break;
741         case ArgNone:
742         case ArgOnFloatFpStack:
743         case ArgOnDoubleFpStack:
744                 break;
745         default:
746                 g_assert_not_reached ();
747         }
748
749         if (sig->hasthis) {
750                 inst = m->varinfo [curinst];
751                 if (inst->opcode != OP_REGVAR) {
752                         inst->opcode = OP_REGOFFSET;
753                         inst->inst_basereg = X86_EBP;
754                 }
755                 inst->inst_offset = offset;
756                 offset += sizeof (gpointer);
757                 curinst++;
758         }
759
760         if (sig->call_convention == MONO_CALL_VARARG) {
761                 m->sig_cookie = offset;
762                 offset += sizeof (gpointer);
763         }
764
765         for (i = 0; i < sig->param_count; ++i) {
766                 inst = m->varinfo [curinst];
767                 if (inst->opcode != OP_REGVAR) {
768                         inst->opcode = OP_REGOFFSET;
769                         inst->inst_basereg = X86_EBP;
770                 }
771                 inst->inst_offset = offset;
772                 size = mono_type_size (sig->params [i], &align);
773                 size += 4 - 1;
774                 size &= ~(4 - 1);
775                 offset += size;
776                 curinst++;
777         }
778
779         offset = 0;
780
781         /* reserve space to save LMF and caller saved registers */
782
783         if (m->method->save_lmf) {
784                 offset += sizeof (MonoLMF);
785         } else {
786                 if (m->used_int_regs & (1 << X86_EBX)) {
787                         offset += 4;
788                 }
789
790                 if (m->used_int_regs & (1 << X86_EDI)) {
791                         offset += 4;
792                 }
793
794                 if (m->used_int_regs & (1 << X86_ESI)) {
795                         offset += 4;
796                 }
797         }
798
799         switch (cinfo->ret.storage) {
800         case ArgValuetypeInReg:
801                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
802                 offset += 8;
803                 m->ret->opcode = OP_REGOFFSET;
804                 m->ret->inst_basereg = X86_EBP;
805                 m->ret->inst_offset = - offset;
806                 break;
807         default:
808                 break;
809         }
810
811         /* Allocate locals */
812         offsets = mono_allocate_stack_slots (m, &locals_stack_size, &locals_stack_align);
813         if (locals_stack_align) {
814                 offset += (locals_stack_align - 1);
815                 offset &= ~(locals_stack_align - 1);
816         }
817         for (i = m->locals_start; i < m->num_varinfo; i++) {
818                 if (offsets [i] != -1) {
819                         MonoInst *inst = m->varinfo [i];
820                         inst->opcode = OP_REGOFFSET;
821                         inst->inst_basereg = X86_EBP;
822                         inst->inst_offset = - (offset + offsets [i]);
823                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
824                 }
825         }
826         g_free (offsets);
827         offset += locals_stack_size;
828
829         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
830         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
831
832         g_free (cinfo);
833
834         /* change sign? */
835         m->stack_offset = -offset;
836 }
837
838 void
839 mono_arch_create_vars (MonoCompile *cfg)
840 {
841         MonoMethodSignature *sig;
842         CallInfo *cinfo;
843
844         sig = mono_method_signature (cfg->method);
845
846         cinfo = get_call_info (sig, FALSE);
847
848         if (cinfo->ret.storage == ArgValuetypeInReg)
849                 cfg->ret_var_is_local = TRUE;
850
851         g_free (cinfo);
852 }
853
854 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
855  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
856  */
857
858 /* 
859  * take the arguments and generate the arch-specific
860  * instructions to properly call the function in call.
861  * This includes pushing, moving arguments to the right register
862  * etc.
863  * Issue: who does the spilling if needed, and when?
864  */
865 MonoCallInst*
866 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
867         MonoInst *arg, *in;
868         MonoMethodSignature *sig;
869         int i, n, stack_size, type;
870         MonoType *ptype;
871         CallInfo *cinfo;
872
873         stack_size = 0;
874         /* add the vararg cookie before the non-implicit args */
875         if (call->signature->call_convention == MONO_CALL_VARARG) {
876                 MonoInst *sig_arg;
877                 /* FIXME: Add support for signature tokens to AOT */
878                 cfg->disable_aot = TRUE;
879                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
880                 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
881                 sig_arg->inst_p0 = call->signature;
882                 arg->inst_left = sig_arg;
883                 arg->type = STACK_PTR;
884                 /* prepend, so they get reversed */
885                 arg->next = call->out_args;
886                 call->out_args = arg;
887                 stack_size += sizeof (gpointer);
888         }
889         sig = call->signature;
890         n = sig->param_count + sig->hasthis;
891
892         cinfo = get_call_info (sig, FALSE);
893
894         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
895                 if (cinfo->ret.storage == ArgOnStack)
896                         stack_size += sizeof (gpointer);
897         }
898
899         for (i = 0; i < n; ++i) {
900                 if (is_virtual && i == 0) {
901                         /* the argument will be attached to the call instrucion */
902                         in = call->args [i];
903                         stack_size += 4;
904                 } else {
905                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
906                         in = call->args [i];
907                         arg->cil_code = in->cil_code;
908                         arg->inst_left = in;
909                         arg->type = in->type;
910                         /* prepend, so they get reversed */
911                         arg->next = call->out_args;
912                         call->out_args = arg;
913                         if (i >= sig->hasthis) {
914                                 MonoType *t = sig->params [i - sig->hasthis];
915                                 ptype = mono_type_get_underlying_type (t);
916                                 if (t->byref)
917                                         type = MONO_TYPE_U;
918                                 else
919                                         type = ptype->type;
920                                 /* FIXME: validate arguments... */
921                                 switch (type) {
922                                 case MONO_TYPE_I:
923                                 case MONO_TYPE_U:
924                                 case MONO_TYPE_BOOLEAN:
925                                 case MONO_TYPE_CHAR:
926                                 case MONO_TYPE_I1:
927                                 case MONO_TYPE_U1:
928                                 case MONO_TYPE_I2:
929                                 case MONO_TYPE_U2:
930                                 case MONO_TYPE_I4:
931                                 case MONO_TYPE_U4:
932                                 case MONO_TYPE_STRING:
933                                 case MONO_TYPE_CLASS:
934                                 case MONO_TYPE_OBJECT:
935                                 case MONO_TYPE_PTR:
936                                 case MONO_TYPE_FNPTR:
937                                 case MONO_TYPE_ARRAY:
938                                 case MONO_TYPE_SZARRAY:
939                                         stack_size += 4;
940                                         break;
941                                 case MONO_TYPE_I8:
942                                 case MONO_TYPE_U8:
943                                         stack_size += 8;
944                                         break;
945                                 case MONO_TYPE_R4:
946                                         stack_size += 4;
947                                         arg->opcode = OP_OUTARG_R4;
948                                         break;
949                                 case MONO_TYPE_R8:
950                                         stack_size += 8;
951                                         arg->opcode = OP_OUTARG_R8;
952                                         break;
953                                 case MONO_TYPE_VALUETYPE: {
954                                         int size;
955                                         if (sig->pinvoke) 
956                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
957                                         else 
958                                                 size = mono_type_stack_size (&in->klass->byval_arg, NULL);
959
960                                         stack_size += size;
961                                         arg->opcode = OP_OUTARG_VT;
962                                         arg->klass = in->klass;
963                                         arg->unused = sig->pinvoke;
964                                         arg->inst_imm = size; 
965                                         break;
966                                 }
967                                 case MONO_TYPE_TYPEDBYREF:
968                                         stack_size += sizeof (MonoTypedRef);
969                                         arg->opcode = OP_OUTARG_VT;
970                                         arg->klass = in->klass;
971                                         arg->unused = sig->pinvoke;
972                                         arg->inst_imm = sizeof (MonoTypedRef); 
973                                         break;
974                                 default:
975                                         g_error ("unknown type 0x%02x in mono_arch_call_opcode\n", type);
976                                 }
977                         } else {
978                                 /* the this argument */
979                                 stack_size += 4;
980                         }
981                 }
982         }
983
984         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
985                 if (cinfo->ret.storage == ArgValuetypeInReg) {
986                         MonoInst *zero_inst;
987                         /*
988                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
989                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
990                          * before calling the function. So we add a dummy instruction to represent pushing the 
991                          * struct return address to the stack. The return address will be saved to this stack slot 
992                          * by the code emitted in this_vret_args.
993                          */
994                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
995                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
996                         zero_inst->inst_p0 = 0;
997                         arg->inst_left = zero_inst;
998                         arg->type = STACK_PTR;
999                         /* prepend, so they get reversed */
1000                         arg->next = call->out_args;
1001                         call->out_args = arg;
1002                 }
1003                 else
1004                         /* if the function returns a struct, the called method already does a ret $0x4 */
1005                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1006                                 stack_size -= 4;
1007         }
1008
1009         call->stack_usage = stack_size;
1010         g_free (cinfo);
1011
1012         /* 
1013          * should set more info in call, such as the stack space
1014          * used by the args that needs to be added back to esp
1015          */
1016
1017         return call;
1018 }
1019
1020 /*
1021  * Allow tracing to work with this interface (with an optional argument)
1022  */
1023 void*
1024 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1025 {
1026         guchar *code = p;
1027
1028         /* if some args are passed in registers, we need to save them here */
1029         x86_push_reg (code, X86_EBP);
1030
1031         if (cfg->compile_aot) {
1032                 x86_push_imm (code, cfg->method);
1033                 x86_mov_reg_imm (code, X86_EAX, func);
1034                 x86_call_reg (code, X86_EAX);
1035         } else {
1036                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1037                 x86_push_imm (code, cfg->method);
1038                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1039                 x86_call_code (code, 0);
1040         }
1041         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1042
1043         return code;
1044 }
1045
1046 enum {
1047         SAVE_NONE,
1048         SAVE_STRUCT,
1049         SAVE_EAX,
1050         SAVE_EAX_EDX,
1051         SAVE_FP
1052 };
1053
1054 void*
1055 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1056 {
1057         guchar *code = p;
1058         int arg_size = 0, save_mode = SAVE_NONE;
1059         MonoMethod *method = cfg->method;
1060         
1061         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1062         case MONO_TYPE_VOID:
1063                 /* special case string .ctor icall */
1064                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1065                         save_mode = SAVE_EAX;
1066                 else
1067                         save_mode = SAVE_NONE;
1068                 break;
1069         case MONO_TYPE_I8:
1070         case MONO_TYPE_U8:
1071                 save_mode = SAVE_EAX_EDX;
1072                 break;
1073         case MONO_TYPE_R4:
1074         case MONO_TYPE_R8:
1075                 save_mode = SAVE_FP;
1076                 break;
1077         case MONO_TYPE_VALUETYPE:
1078                 save_mode = SAVE_STRUCT;
1079                 break;
1080         default:
1081                 save_mode = SAVE_EAX;
1082                 break;
1083         }
1084
1085         switch (save_mode) {
1086         case SAVE_EAX_EDX:
1087                 x86_push_reg (code, X86_EDX);
1088                 x86_push_reg (code, X86_EAX);
1089                 if (enable_arguments) {
1090                         x86_push_reg (code, X86_EDX);
1091                         x86_push_reg (code, X86_EAX);
1092                         arg_size = 8;
1093                 }
1094                 break;
1095         case SAVE_EAX:
1096                 x86_push_reg (code, X86_EAX);
1097                 if (enable_arguments) {
1098                         x86_push_reg (code, X86_EAX);
1099                         arg_size = 4;
1100                 }
1101                 break;
1102         case SAVE_FP:
1103                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1104                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1105                 if (enable_arguments) {
1106                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1107                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1108                         arg_size = 8;
1109                 }
1110                 break;
1111         case SAVE_STRUCT:
1112                 if (enable_arguments) {
1113                         x86_push_membase (code, X86_EBP, 8);
1114                         arg_size = 4;
1115                 }
1116                 break;
1117         case SAVE_NONE:
1118         default:
1119                 break;
1120         }
1121
1122         if (cfg->compile_aot) {
1123                 x86_push_imm (code, method);
1124                 x86_mov_reg_imm (code, X86_EAX, func);
1125                 x86_call_reg (code, X86_EAX);
1126         } else {
1127                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1128                 x86_push_imm (code, method);
1129                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1130                 x86_call_code (code, 0);
1131         }
1132         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1133
1134         switch (save_mode) {
1135         case SAVE_EAX_EDX:
1136                 x86_pop_reg (code, X86_EAX);
1137                 x86_pop_reg (code, X86_EDX);
1138                 break;
1139         case SAVE_EAX:
1140                 x86_pop_reg (code, X86_EAX);
1141                 break;
1142         case SAVE_FP:
1143                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1144                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1145                 break;
1146         case SAVE_NONE:
1147         default:
1148                 break;
1149         }
1150
1151         return code;
1152 }
1153
1154 #define EMIT_COND_BRANCH(ins,cond,sign) \
1155 if (ins->flags & MONO_INST_BRLABEL) { \
1156         if (ins->inst_i0->inst_c0) { \
1157                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1158         } else { \
1159                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1160                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1161                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1162                         x86_branch8 (code, cond, 0, sign); \
1163                 else \
1164                         x86_branch32 (code, cond, 0, sign); \
1165         } \
1166 } else { \
1167         if (ins->inst_true_bb->native_offset) { \
1168                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1169         } else { \
1170                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1171                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1172                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1173                         x86_branch8 (code, cond, 0, sign); \
1174                 else \
1175                         x86_branch32 (code, cond, 0, sign); \
1176         } \
1177 }
1178
1179 /* emit an exception if condition is fail */
1180 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1181         do {                                                        \
1182                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1183                                     MONO_PATCH_INFO_EXC, exc_name);  \
1184                 x86_branch32 (code, cond, 0, signed);               \
1185         } while (0); 
1186
1187 #define EMIT_FPCOMPARE(code) do { \
1188         x86_fcompp (code); \
1189         x86_fnstsw (code); \
1190 } while (0); 
1191
1192
1193 static guint8*
1194 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1195 {
1196         if (cfg->compile_aot) {
1197                 guint32 got_reg = X86_EAX;
1198
1199                 if (cfg->compile_aot) {          
1200                         /*
1201                          * Since the patches are generated by the back end, there is
1202                          * no way to generate a got_var at this point.
1203                          */
1204                         g_assert (cfg->got_var);
1205
1206                         if (cfg->got_var->opcode == OP_REGOFFSET)
1207                                 x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
1208                         else
1209                                 got_reg = cfg->got_var->dreg;
1210                 }
1211
1212                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1213                 x86_call_membase (code, got_reg, 0xf0f0f0f0);
1214         }
1215         else {
1216                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1217                 x86_call_code (code, 0);
1218         }
1219
1220         return code;
1221 }
1222
1223 /* FIXME: Add more instructions */
1224 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1225
1226 static void
1227 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1228 {
1229         MonoInst *ins, *last_ins = NULL;
1230         ins = bb->code;
1231
1232         while (ins) {
1233
1234                 switch (ins->opcode) {
1235                 case OP_ICONST:
1236                         /* reg = 0 -> XOR (reg, reg) */
1237                         /* XOR sets cflags on x86, so we cant do it always */
1238                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1239                                 ins->opcode = CEE_XOR;
1240                                 ins->sreg1 = ins->dreg;
1241                                 ins->sreg2 = ins->dreg;
1242                         }
1243                         break;
1244                 case OP_MUL_IMM: 
1245                         /* remove unnecessary multiplication with 1 */
1246                         if (ins->inst_imm == 1) {
1247                                 if (ins->dreg != ins->sreg1) {
1248                                         ins->opcode = OP_MOVE;
1249                                 } else {
1250                                         last_ins->next = ins->next;
1251                                         ins = ins->next;
1252                                         continue;
1253                                 }
1254                         }
1255                         break;
1256                 case OP_COMPARE_IMM:
1257                         /* OP_COMPARE_IMM (reg, 0) 
1258                          * --> 
1259                          * OP_X86_TEST_NULL (reg) 
1260                          */
1261                         if (!ins->inst_imm)
1262                                 ins->opcode = OP_X86_TEST_NULL;
1263                         break;
1264                 case OP_X86_COMPARE_MEMBASE_IMM:
1265                         /* 
1266                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1267                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1268                          * -->
1269                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1270                          * OP_COMPARE_IMM reg, imm
1271                          *
1272                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1273                          */
1274                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1275                             ins->inst_basereg == last_ins->inst_destbasereg &&
1276                             ins->inst_offset == last_ins->inst_offset) {
1277                                         ins->opcode = OP_COMPARE_IMM;
1278                                         ins->sreg1 = last_ins->sreg1;
1279
1280                                         /* check if we can remove cmp reg,0 with test null */
1281                                         if (!ins->inst_imm)
1282                                                 ins->opcode = OP_X86_TEST_NULL;
1283                                 }
1284
1285                         break;
1286                 case OP_LOAD_MEMBASE:
1287                 case OP_LOADI4_MEMBASE:
1288                         /* 
1289                          * Note: if reg1 = reg2 the load op is removed
1290                          *
1291                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1292                          * OP_LOAD_MEMBASE offset(basereg), reg2
1293                          * -->
1294                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1295                          * OP_MOVE reg1, reg2
1296                          */
1297                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1298                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1299                             ins->inst_basereg == last_ins->inst_destbasereg &&
1300                             ins->inst_offset == last_ins->inst_offset) {
1301                                 if (ins->dreg == last_ins->sreg1) {
1302                                         last_ins->next = ins->next;                             
1303                                         ins = ins->next;                                
1304                                         continue;
1305                                 } else {
1306                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1307                                         ins->opcode = OP_MOVE;
1308                                         ins->sreg1 = last_ins->sreg1;
1309                                 }
1310
1311                         /* 
1312                          * Note: reg1 must be different from the basereg in the second load
1313                          * Note: if reg1 = reg2 is equal then second load is removed
1314                          *
1315                          * OP_LOAD_MEMBASE offset(basereg), reg1
1316                          * OP_LOAD_MEMBASE offset(basereg), reg2
1317                          * -->
1318                          * OP_LOAD_MEMBASE offset(basereg), reg1
1319                          * OP_MOVE reg1, reg2
1320                          */
1321                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1322                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1323                               ins->inst_basereg != last_ins->dreg &&
1324                               ins->inst_basereg == last_ins->inst_basereg &&
1325                               ins->inst_offset == last_ins->inst_offset) {
1326
1327                                 if (ins->dreg == last_ins->dreg) {
1328                                         last_ins->next = ins->next;                             
1329                                         ins = ins->next;                                
1330                                         continue;
1331                                 } else {
1332                                         ins->opcode = OP_MOVE;
1333                                         ins->sreg1 = last_ins->dreg;
1334                                 }
1335
1336                                 //g_assert_not_reached ();
1337
1338 #if 0
1339                         /* 
1340                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1341                          * OP_LOAD_MEMBASE offset(basereg), reg
1342                          * -->
1343                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1344                          * OP_ICONST reg, imm
1345                          */
1346                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1347                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1348                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1349                                    ins->inst_offset == last_ins->inst_offset) {
1350                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1351                                 ins->opcode = OP_ICONST;
1352                                 ins->inst_c0 = last_ins->inst_imm;
1353                                 g_assert_not_reached (); // check this rule
1354 #endif
1355                         }
1356                         break;
1357                 case OP_LOADU1_MEMBASE:
1358                 case OP_LOADI1_MEMBASE:
1359                         /* 
1360                          * Note: if reg1 = reg2 the load op is removed
1361                          *
1362                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1363                          * OP_LOAD_MEMBASE offset(basereg), reg2
1364                          * -->
1365                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1366                          * OP_MOVE reg1, reg2
1367                          */
1368                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1369                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1370                                         ins->inst_offset == last_ins->inst_offset) {
1371                                 if (ins->dreg == last_ins->sreg1) {
1372                                         last_ins->next = ins->next;                             
1373                                         ins = ins->next;                                
1374                                         continue;
1375                                 } else {
1376                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1377                                         ins->opcode = OP_MOVE;
1378                                         ins->sreg1 = last_ins->sreg1;
1379                                 }
1380                         }
1381                         break;
1382                 case OP_LOADU2_MEMBASE:
1383                 case OP_LOADI2_MEMBASE:
1384                         /* 
1385                          * Note: if reg1 = reg2 the load op is removed
1386                          *
1387                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1388                          * OP_LOAD_MEMBASE offset(basereg), reg2
1389                          * -->
1390                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1391                          * OP_MOVE reg1, reg2
1392                          */
1393                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1394                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1395                                         ins->inst_offset == last_ins->inst_offset) {
1396                                 if (ins->dreg == last_ins->sreg1) {
1397                                         last_ins->next = ins->next;                             
1398                                         ins = ins->next;                                
1399                                         continue;
1400                                 } else {
1401                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1402                                         ins->opcode = OP_MOVE;
1403                                         ins->sreg1 = last_ins->sreg1;
1404                                 }
1405                         }
1406                         break;
1407                 case CEE_CONV_I4:
1408                 case CEE_CONV_U4:
1409                 case OP_MOVE:
1410                         /*
1411                          * Removes:
1412                          *
1413                          * OP_MOVE reg, reg 
1414                          */
1415                         if (ins->dreg == ins->sreg1) {
1416                                 if (last_ins)
1417                                         last_ins->next = ins->next;                             
1418                                 ins = ins->next;
1419                                 continue;
1420                         }
1421                         /* 
1422                          * Removes:
1423                          *
1424                          * OP_MOVE sreg, dreg 
1425                          * OP_MOVE dreg, sreg
1426                          */
1427                         if (last_ins && last_ins->opcode == OP_MOVE &&
1428                             ins->sreg1 == last_ins->dreg &&
1429                             ins->dreg == last_ins->sreg1) {
1430                                 last_ins->next = ins->next;                             
1431                                 ins = ins->next;                                
1432                                 continue;
1433                         }
1434                         break;
1435                         
1436                 case OP_X86_PUSH_MEMBASE:
1437                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1438                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1439                             ins->inst_basereg == last_ins->inst_destbasereg &&
1440                             ins->inst_offset == last_ins->inst_offset) {
1441                                     ins->opcode = OP_X86_PUSH;
1442                                     ins->sreg1 = last_ins->sreg1;
1443                         }
1444                         break;
1445                 }
1446                 last_ins = ins;
1447                 ins = ins->next;
1448         }
1449         bb->last_ins = last_ins;
1450 }
1451
1452 static const int 
1453 branch_cc_table [] = {
1454         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1455         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1456         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1457 };
1458
1459 #define DEBUG(a) if (cfg->verbose_level > 1) a
1460 //#define DEBUG(a)
1461
1462 /*
1463  * returns the offset used by spillvar. It allocates a new
1464  * spill variable if necessary. 
1465  */
1466 static int
1467 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1468 {
1469         MonoSpillInfo **si, *info;
1470         int i = 0;
1471
1472         si = &cfg->spill_info; 
1473         
1474         while (i <= spillvar) {
1475
1476                 if (!*si) {
1477                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1478                         info->next = NULL;
1479                         cfg->stack_offset -= sizeof (gpointer);
1480                         info->offset = cfg->stack_offset;
1481                 }
1482
1483                 if (i == spillvar)
1484                         return (*si)->offset;
1485
1486                 i++;
1487                 si = &(*si)->next;
1488         }
1489
1490         g_assert_not_reached ();
1491         return 0;
1492 }
1493
1494 /*
1495  * returns the offset used by spillvar. It allocates a new
1496  * spill float variable if necessary. 
1497  * (same as mono_spillvar_offset but for float)
1498  */
1499 static int
1500 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1501 {
1502         MonoSpillInfo **si, *info;
1503         int i = 0;
1504
1505         si = &cfg->spill_info_float; 
1506         
1507         while (i <= spillvar) {
1508
1509                 if (!*si) {
1510                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1511                         info->next = NULL;
1512                         cfg->stack_offset -= sizeof (double);
1513                         info->offset = cfg->stack_offset;
1514                 }
1515
1516                 if (i == spillvar)
1517                         return (*si)->offset;
1518
1519                 i++;
1520                 si = &(*si)->next;
1521         }
1522
1523         g_assert_not_reached ();
1524         return 0;
1525 }
1526
1527 /*
1528  * Creates a store for spilled floating point items
1529  */
1530 static MonoInst*
1531 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1532 {
1533         MonoInst *store;
1534         MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1535         store->sreg1 = reg;
1536         store->inst_destbasereg = X86_EBP;
1537         store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1538
1539         DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08x(%%sp)) (from %d)\n", spill, store->inst_offset, reg));
1540         return store;
1541 }
1542
1543 /*
1544  * Creates a load for spilled floating point items 
1545  */
1546 static MonoInst*
1547 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1548 {
1549         MonoInst *load;
1550         MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1551         load->dreg = reg;
1552         load->inst_basereg = X86_EBP;
1553         load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1554
1555         DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08x(%%sp)) (from %d)\n", spill, load->inst_offset, reg));
1556         return load;
1557 }
1558
1559 #define is_global_ireg(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && !X86_IS_CALLEE ((r)))
1560 #define reg_is_freeable(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && X86_IS_CALLEE ((r)))
1561
1562 typedef struct {
1563         int born_in;
1564         int killed_in;
1565         int last_use;
1566         int prev_use;
1567         int flags;              /* used to track fp spill/load */
1568 } RegTrack;
1569
1570 static const char*const * ins_spec = pentium_desc;
1571
1572 static void
1573 print_ins (int i, MonoInst *ins)
1574 {
1575         const char *spec = ins_spec [ins->opcode];
1576         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1577         if (spec [MONO_INST_DEST]) {
1578                 if (ins->dreg >= MONO_MAX_IREGS)
1579                         g_print (" R%d <-", ins->dreg);
1580                 else
1581                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1582         }
1583         if (spec [MONO_INST_SRC1]) {
1584                 if (ins->sreg1 >= MONO_MAX_IREGS)
1585                         g_print (" R%d", ins->sreg1);
1586                 else
1587                         g_print (" %s", mono_arch_regname (ins->sreg1));
1588         }
1589         if (spec [MONO_INST_SRC2]) {
1590                 if (ins->sreg2 >= MONO_MAX_IREGS)
1591                         g_print (" R%d", ins->sreg2);
1592                 else
1593                         g_print (" %s", mono_arch_regname (ins->sreg2));
1594         }
1595         if (spec [MONO_INST_CLOB])
1596                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1597         g_print ("\n");
1598 }
1599
1600 static void
1601 print_regtrack (RegTrack *t, int num)
1602 {
1603         int i;
1604         char buf [32];
1605         const char *r;
1606         
1607         for (i = 0; i < num; ++i) {
1608                 if (!t [i].born_in)
1609                         continue;
1610                 if (i >= MONO_MAX_IREGS) {
1611                         g_snprintf (buf, sizeof(buf), "R%d", i);
1612                         r = buf;
1613                 } else
1614                         r = mono_arch_regname (i);
1615                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1616         }
1617 }
1618
1619 typedef struct InstList InstList;
1620
1621 struct InstList {
1622         InstList *prev;
1623         InstList *next;
1624         MonoInst *data;
1625 };
1626
1627 static inline InstList*
1628 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1629 {
1630         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1631         item->data = data;
1632         item->prev = NULL;
1633         item->next = list;
1634         if (list)
1635                 list->prev = item;
1636         return item;
1637 }
1638
1639 /*
1640  * Force the spilling of the variable in the symbolic register 'reg'.
1641  */
1642 static int
1643 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1644 {
1645         MonoInst *load;
1646         int i, sel, spill;
1647         
1648         sel = cfg->rs->iassign [reg];
1649         /*i = cfg->rs->isymbolic [sel];
1650         g_assert (i == reg);*/
1651         i = reg;
1652         spill = ++cfg->spill_count;
1653         cfg->rs->iassign [i] = -spill - 1;
1654         mono_regstate_free_int (cfg->rs, sel);
1655         /* we need to create a spill var and insert a load to sel after the current instruction */
1656         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1657         load->dreg = sel;
1658         load->inst_basereg = X86_EBP;
1659         load->inst_offset = mono_spillvar_offset (cfg, spill);
1660         if (item->prev) {
1661                 while (ins->next != item->prev->data)
1662                         ins = ins->next;
1663         }
1664         load->next = ins->next;
1665         ins->next = load;
1666         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1667         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1668         g_assert (i == sel);
1669
1670         return sel;
1671 }
1672
1673 static int
1674 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1675 {
1676         MonoInst *load;
1677         int i, sel, spill;
1678
1679         DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1680         /* exclude the registers in the current instruction */
1681         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1682                 if (ins->sreg1 >= MONO_MAX_IREGS)
1683                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1684                 else
1685                         regmask &= ~ (1 << ins->sreg1);
1686                 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1687         }
1688         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1689                 if (ins->sreg2 >= MONO_MAX_IREGS)
1690                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1691                 else
1692                         regmask &= ~ (1 << ins->sreg2);
1693                 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1694         }
1695         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1696                 regmask &= ~ (1 << ins->dreg);
1697                 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1698         }
1699
1700         DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1701         g_assert (regmask); /* need at least a register we can free */
1702         sel = -1;
1703         /* we should track prev_use and spill the register that's farther */
1704         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1705                 if (regmask & (1 << i)) {
1706                         sel = i;
1707                         DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1708                         break;
1709                 }
1710         }
1711         i = cfg->rs->isymbolic [sel];
1712         spill = ++cfg->spill_count;
1713         cfg->rs->iassign [i] = -spill - 1;
1714         mono_regstate_free_int (cfg->rs, sel);
1715         /* we need to create a spill var and insert a load to sel after the current instruction */
1716         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1717         load->dreg = sel;
1718         load->inst_basereg = X86_EBP;
1719         load->inst_offset = mono_spillvar_offset (cfg, spill);
1720         if (item->prev) {
1721                 while (ins->next != item->prev->data)
1722                         ins = ins->next;
1723         }
1724         load->next = ins->next;
1725         ins->next = load;
1726         DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1727         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1728         g_assert (i == sel);
1729         
1730         return sel;
1731 }
1732
1733 static MonoInst*
1734 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1735 {
1736         MonoInst *copy;
1737         MONO_INST_NEW (cfg, copy, OP_MOVE);
1738         copy->dreg = dest;
1739         copy->sreg1 = src;
1740         if (ins) {
1741                 copy->next = ins->next;
1742                 ins->next = copy;
1743         }
1744         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1745         return copy;
1746 }
1747
1748 static MonoInst*
1749 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1750 {
1751         MonoInst *store;
1752         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1753         store->sreg1 = reg;
1754         store->inst_destbasereg = X86_EBP;
1755         store->inst_offset = mono_spillvar_offset (cfg, spill);
1756         if (ins) {
1757                 store->next = ins->next;
1758                 ins->next = store;
1759         }
1760         DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08x(%%ebp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
1761         return store;
1762 }
1763
1764 static void
1765 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1766 {
1767         MonoInst *prev;
1768         if (item->next) {
1769                 prev = item->next->data;
1770
1771                 while (prev->next != ins)
1772                         prev = prev->next;
1773                 to_insert->next = ins;
1774                 prev->next = to_insert;
1775         } else {
1776                 to_insert->next = ins;
1777         }
1778         /* 
1779          * needed otherwise in the next instruction we can add an ins to the 
1780          * end and that would get past this instruction.
1781          */
1782         item->data = to_insert; 
1783 }
1784
1785
1786 #if  0
1787 static int
1788 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1789 {
1790         int val = cfg->rs->iassign [sym_reg];
1791         if (val < 0) {
1792                 int spill = 0;
1793                 if (val < -1) {
1794                         /* the register gets spilled after this inst */
1795                         spill = -val -1;
1796                 }
1797                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1798                 if (val < 0)
1799                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1800                 cfg->rs->iassign [sym_reg] = val;
1801                 /* add option to store before the instruction for src registers */
1802                 if (spill)
1803                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1804         }
1805         cfg->rs->isymbolic [val] = sym_reg;
1806         return val;
1807 }
1808 #endif
1809
1810 /* flags used in reginfo->flags */
1811 enum {
1812         MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
1813         MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
1814         MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
1815         MONO_X86_REG_NOT_ECX                    = 1 << 3,
1816         MONO_X86_REG_EAX                                = 1 << 4,
1817         MONO_X86_REG_EDX                                = 1 << 5,
1818         MONO_X86_REG_ECX                                = 1 << 6
1819 };
1820
1821 static int
1822 mono_x86_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1823 {
1824         int val;
1825         int test_mask = dest_mask;
1826
1827         if (flags & MONO_X86_REG_EAX)
1828                 test_mask &= (1 << X86_EAX);
1829         else if (flags & MONO_X86_REG_EDX)
1830                 test_mask &= (1 << X86_EDX);
1831         else if (flags & MONO_X86_REG_ECX)
1832                 test_mask &= (1 << X86_ECX);
1833         else if (flags & MONO_X86_REG_NOT_ECX)
1834                 test_mask &= ~ (1 << X86_ECX);
1835
1836         val = mono_regstate_alloc_int (cfg->rs, test_mask);
1837         if (val >= 0 && test_mask != dest_mask)
1838                 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1839
1840         if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1841                 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1842                 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << X86_ECX)));
1843         }
1844
1845         if (val < 0) {
1846                 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1847                 if (val < 0)
1848                         val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1849         }
1850
1851         return val;
1852 }
1853
1854 static inline void
1855 assign_ireg (MonoRegState *rs, int reg, int hreg)
1856 {
1857         g_assert (reg >= MONO_MAX_IREGS);
1858         g_assert (hreg < MONO_MAX_IREGS);
1859         g_assert (! is_global_ireg (hreg));
1860
1861         rs->iassign [reg] = hreg;
1862         rs->isymbolic [hreg] = reg;
1863         rs->ifree_mask &= ~ (1 << hreg);
1864 }
1865
1866 /*#include "cprop.c"*/
1867
1868 /*
1869  * Local register allocation.
1870  * We first scan the list of instructions and we save the liveness info of
1871  * each register (when the register is first used, when it's value is set etc.).
1872  * We also reverse the list of instructions (in the InstList list) because assigning
1873  * registers backwards allows for more tricks to be used.
1874  */
1875 void
1876 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1877 {
1878         MonoInst *ins;
1879         MonoRegState *rs = cfg->rs;
1880         int i, val, fpcount;
1881         RegTrack *reginfo, *reginfof;
1882         RegTrack *reginfo1, *reginfo2, *reginfod;
1883         InstList *tmp, *reversed = NULL;
1884         const char *spec;
1885         guint32 src1_mask, src2_mask, dest_mask;
1886         GList *fspill_list = NULL;
1887         int fspill = 0;
1888
1889         if (!bb->code)
1890                 return;
1891         rs->next_vireg = bb->max_ireg;
1892         rs->next_vfreg = bb->max_freg;
1893         mono_regstate_assign (rs);
1894         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1895         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1896         rs->ifree_mask = X86_CALLEE_REGS;
1897
1898         ins = bb->code;
1899
1900         /*if (cfg->opt & MONO_OPT_COPYPROP)
1901                 local_copy_prop (cfg, ins);*/
1902
1903         i = 1;
1904         fpcount = 0;
1905         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1906         /* forward pass on the instructions to collect register liveness info */
1907         while (ins) {
1908                 spec = ins_spec [ins->opcode];
1909                 
1910                 DEBUG (print_ins (i, ins));
1911
1912                 if (spec [MONO_INST_SRC1]) {
1913                         if (spec [MONO_INST_SRC1] == 'f') {
1914                                 GList *spill;
1915                                 reginfo1 = reginfof;
1916
1917                                 spill = g_list_first (fspill_list);
1918                                 if (spill && fpcount < MONO_MAX_FREGS) {
1919                                         reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1920                                         fspill_list = g_list_remove (fspill_list, spill->data);
1921                                 } else
1922                                         fpcount--;
1923                         }
1924                         else
1925                                 reginfo1 = reginfo;
1926                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1927                         reginfo1 [ins->sreg1].last_use = i;
1928                         if (spec [MONO_INST_SRC1] == 'L') {
1929                                 /* The virtual register is allocated sequentially */
1930                                 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1931                                 reginfo1 [ins->sreg1 + 1].last_use = i;
1932                                 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1933                                         reginfo1 [ins->sreg1 + 1].born_in = i;
1934
1935                                 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1936                                 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1937                         }
1938                 } else {
1939                         ins->sreg1 = -1;
1940                 }
1941                 if (spec [MONO_INST_SRC2]) {
1942                         if (spec [MONO_INST_SRC2] == 'f') {
1943                                 GList *spill;
1944                                 reginfo2 = reginfof;
1945                                 spill = g_list_first (fspill_list);
1946                                 if (spill) {
1947                                         reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1948                                         fspill_list = g_list_remove (fspill_list, spill->data);
1949                                         if (fpcount >= MONO_MAX_FREGS) {
1950                                                 fspill++;
1951                                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1952                                                 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1953                                         }
1954                                 } else
1955                                         fpcount--;
1956                         }
1957                         else
1958                                 reginfo2 = reginfo;
1959                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1960                         reginfo2 [ins->sreg2].last_use = i;
1961                         if (spec [MONO_INST_SRC2] == 'L') {
1962                                 /* The virtual register is allocated sequentially */
1963                                 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1964                                 reginfo2 [ins->sreg2 + 1].last_use = i;
1965                                 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1966                                         reginfo2 [ins->sreg2 + 1].born_in = i;
1967                         }
1968                         if (spec [MONO_INST_CLOB] == 's') {
1969                                 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1970                                 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1971                         }
1972                 } else {
1973                         ins->sreg2 = -1;
1974                 }
1975                 if (spec [MONO_INST_DEST]) {
1976                         if (spec [MONO_INST_DEST] == 'f') {
1977                                 reginfod = reginfof;
1978                                 if (fpcount >= MONO_MAX_FREGS) {
1979                                         reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1980                                         fspill++;
1981                                         fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1982                                         fpcount--;
1983                                 }
1984                                 fpcount++;
1985                         }
1986                         else
1987                                 reginfod = reginfo;
1988                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1989                                 reginfod [ins->dreg].killed_in = i;
1990                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1991                         reginfod [ins->dreg].last_use = i;
1992                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1993                                 reginfod [ins->dreg].born_in = i;
1994                         if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
1995                                 /* The virtual register is allocated sequentially */
1996                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1997                                 reginfod [ins->dreg + 1].last_use = i;
1998                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1999                                         reginfod [ins->dreg + 1].born_in = i;
2000
2001                                 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
2002                                 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
2003                         }
2004                 } else {
2005                         ins->dreg = -1;
2006                 }
2007
2008                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
2009                 ++i;
2010                 ins = ins->next;
2011         }
2012
2013         // todo: check if we have anything left on fp stack, in verify mode?
2014         fspill = 0;
2015
2016         DEBUG (print_regtrack (reginfo, rs->next_vireg));
2017         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
2018         tmp = reversed;
2019         while (tmp) {
2020                 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
2021                 dest_mask = src1_mask = src2_mask = X86_CALLEE_REGS;
2022                 --i;
2023                 ins = tmp->data;
2024                 spec = ins_spec [ins->opcode];
2025                 prev_dreg = -1;
2026                 clob_dreg = -1;
2027                 DEBUG (g_print ("processing:"));
2028                 DEBUG (print_ins (i, ins));
2029                 if (spec [MONO_INST_CLOB] == 's') {
2030                         /*
2031                          * Shift opcodes, SREG2 must be RCX
2032                          */
2033                         if (rs->ifree_mask & (1 << X86_ECX)) {
2034                                 if (ins->sreg2 < MONO_MAX_IREGS) {
2035                                         /* Argument already in hard reg, need to copy */
2036                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
2037                                         insert_before_ins (ins, tmp, copy);
2038                                 }
2039                                 else {
2040                                         DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
2041                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2042                                 }
2043                         } else {
2044                                 int need_ecx_spill = TRUE;
2045                                 /* 
2046                                  * we first check if src1/dreg is already assigned a register
2047                                  * and then we force a spill of the var assigned to ECX.
2048                                  */
2049                                 /* the destination register can't be ECX */
2050                                 dest_mask &= ~ (1 << X86_ECX);
2051                                 src1_mask &= ~ (1 << X86_ECX);
2052                                 val = rs->iassign [ins->dreg];
2053                                 /* 
2054                                  * the destination register is already assigned to ECX:
2055                                  * we need to allocate another register for it and then
2056                                  * copy from this to ECX.
2057                                  */
2058                                 if (val == X86_ECX && ins->dreg != ins->sreg2) {
2059                                         int new_dest;
2060                                         new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2061                                         g_assert (new_dest >= 0);
2062                                         DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
2063
2064                                         rs->isymbolic [new_dest] = ins->dreg;
2065                                         rs->iassign [ins->dreg] = new_dest;
2066                                         clob_dreg = ins->dreg;
2067                                         ins->dreg = new_dest;
2068                                         create_copy_ins (cfg, X86_ECX, new_dest, ins);
2069                                         need_ecx_spill = FALSE;
2070                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
2071                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
2072                                         rs->iassign [ins->dreg] = val;
2073                                         rs->isymbolic [val] = prev_dreg;
2074                                         ins->dreg = val;*/
2075                                 }
2076                                 if (is_global_ireg (ins->sreg2)) {
2077                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
2078                                         insert_before_ins (ins, tmp, copy);
2079                                 }
2080                                 else {
2081                                         val = rs->iassign [ins->sreg2];
2082                                         if (val >= 0 && val != X86_ECX) {
2083                                                 MonoInst *move = create_copy_ins (cfg, X86_ECX, val, NULL);
2084                                                 DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
2085                                                 move->next = ins;
2086                                                 g_assert_not_reached ();
2087                                                 /* FIXME: where is move connected to the instruction list? */
2088                                                 //tmp->prev->data->next = move;
2089                                         }
2090                                         else {
2091                                                 if (val == X86_ECX)
2092                                                 need_ecx_spill = FALSE;
2093                                         }
2094                                 }
2095                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << X86_ECX))) {
2096                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_ECX]));
2097                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_ECX]);
2098                                         mono_regstate_free_int (rs, X86_ECX);
2099                                 }
2100                                 if (!is_global_ireg (ins->sreg2))
2101                                         /* force-set sreg2 */
2102                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2103                         }
2104                         ins->sreg2 = X86_ECX;
2105                 } else if (spec [MONO_INST_CLOB] == 'd') {
2106                         /*
2107                          * DIVISION/REMAINER
2108                          */
2109                         int dest_reg = X86_EAX;
2110                         int clob_reg = X86_EDX;
2111                         if (spec [MONO_INST_DEST] == 'd') {
2112                                 dest_reg = X86_EDX; /* reminder */
2113                                 clob_reg = X86_EAX;
2114                         }
2115                         if (is_global_ireg (ins->dreg))
2116                                 val = ins->dreg;
2117                         else
2118                                 val = rs->iassign [ins->dreg];
2119                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
2120                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2121                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2122                                 mono_regstate_free_int (rs, dest_reg);
2123                         }
2124                         if (val < 0) {
2125                                 if (val < -1) {
2126                                         /* the register gets spilled after this inst */
2127                                         int spill = -val -1;
2128                                         dest_mask = 1 << dest_reg;
2129                                         prev_dreg = ins->dreg;
2130                                         val = mono_regstate_alloc_int (rs, dest_mask);
2131                                         if (val < 0)
2132                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
2133                                         rs->iassign [ins->dreg] = val;
2134                                         if (spill)
2135                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
2136                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2137                                         rs->isymbolic [val] = prev_dreg;
2138                                         ins->dreg = val;
2139                                 } else {
2140                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
2141                                         prev_dreg = ins->dreg;
2142                                         assign_ireg (rs, ins->dreg, dest_reg);
2143                                         ins->dreg = dest_reg;
2144                                         val = dest_reg;
2145                                 }
2146                         }
2147
2148                         //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
2149                         if (val != dest_reg) { /* force a copy */
2150                                 create_copy_ins (cfg, val, dest_reg, ins);
2151                                 if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
2152                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2153                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2154                                         mono_regstate_free_int (rs, dest_reg);
2155                                 }
2156                         }
2157                         if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
2158                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2159                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2160                                 mono_regstate_free_int (rs, clob_reg);
2161                         }
2162                         src1_mask = 1 << X86_EAX;
2163                         src2_mask = 1 << X86_ECX;
2164                 } else if (spec [MONO_INST_DEST] == 'l') {
2165                         int hreg;
2166                         val = rs->iassign [ins->dreg];
2167                         /* check special case when dreg have been moved from ecx (clob shift) */
2168                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2169                                 hreg = clob_dreg + 1;
2170                         else
2171                                 hreg = ins->dreg + 1;
2172
2173                         /* base prev_dreg on fixed hreg, handle clob case */
2174                         val = hreg - 1;
2175
2176                         if (val != rs->isymbolic [X86_EAX] && !(rs->ifree_mask & (1 << X86_EAX))) {
2177                                 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [X86_EAX]));
2178                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2179                                 mono_regstate_free_int (rs, X86_EAX);
2180                         }
2181                         if (hreg != rs->isymbolic [X86_EDX] && !(rs->ifree_mask & (1 << X86_EDX))) {
2182                                 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [X86_EDX]));
2183                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
2184                                 mono_regstate_free_int (rs, X86_EDX);
2185                         }
2186                 } else if (spec [MONO_INST_CLOB] == 'b') {
2187                         /*
2188                          * x86_set_reg instructions, dreg needs to be EAX..EDX
2189                          */     
2190                         dest_mask = (1 << X86_EAX) | (1 << X86_EBX) | (1 << X86_ECX) | (1 << X86_EDX);
2191                         if ((ins->dreg < MONO_MAX_IREGS) && (! (dest_mask & (1 << ins->dreg)))) {
2192                                 /* 
2193                                  * ins->dreg is already a hard reg, need to allocate another
2194                                  * suitable hard reg and make a copy.
2195                                  */
2196                                 int new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2197                                 g_assert (new_dest >= 0);
2198
2199                                 create_copy_ins (cfg, ins->dreg, new_dest, ins);
2200                                 DEBUG (g_print ("\tclob:b changing dreg R%d to %s\n", ins->dreg, mono_arch_regname (new_dest)));
2201                                 ins->dreg = new_dest;
2202
2203                                 /* The hard reg is no longer needed */
2204                                 mono_regstate_free_int (rs, new_dest);
2205                         }
2206                 }
2207
2208                 /*
2209                  * TRACK DREG
2210                  */
2211                 if (spec [MONO_INST_DEST] == 'f') {
2212                         if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
2213                                 GList *spill_node;
2214                                 MonoInst *store;
2215                                 spill_node = g_list_first (fspill_list);
2216                                 g_assert (spill_node);
2217
2218                                 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
2219                                 insert_before_ins (ins, tmp, store);
2220                                 fspill_list = g_list_remove (fspill_list, spill_node->data);
2221                                 fspill--;
2222                         }
2223                 } else if (spec [MONO_INST_DEST] == 'L') {
2224                         int hreg;
2225                         val = rs->iassign [ins->dreg];
2226                         /* check special case when dreg have been moved from ecx (clob shift) */
2227                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2228                                 hreg = clob_dreg + 1;
2229                         else
2230                                 hreg = ins->dreg + 1;
2231
2232                         /* base prev_dreg on fixed hreg, handle clob case */
2233                         prev_dreg = hreg - 1;
2234
2235                         if (val < 0) {
2236                                 int spill = 0;
2237                                 if (val < -1) {
2238                                         /* the register gets spilled after this inst */
2239                                         spill = -val -1;
2240                                 }
2241                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2242                                 rs->iassign [ins->dreg] = val;
2243                                 if (spill)
2244                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2245                         }
2246
2247                         DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
2248  
2249                         rs->isymbolic [val] = hreg - 1;
2250                         ins->dreg = val;
2251                         
2252                         val = rs->iassign [hreg];
2253                         if (val < 0) {
2254                                 int spill = 0;
2255                                 if (val < -1) {
2256                                         /* the register gets spilled after this inst */
2257                                         spill = -val -1;
2258                                 }
2259                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2260                                 rs->iassign [hreg] = val;
2261                                 if (spill)
2262                                         create_spilled_store (cfg, spill, val, hreg, ins);
2263                         }
2264
2265                         DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
2266                         rs->isymbolic [val] = hreg;
2267                         /* save reg allocating into unused */
2268                         ins->unused = val;
2269
2270                         /* check if we can free our long reg */
2271                         if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2272                                 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
2273                                 mono_regstate_free_int (rs, val);
2274                         }
2275                 }
2276                 else if (ins->dreg >= MONO_MAX_IREGS) {
2277                         int hreg;
2278                         val = rs->iassign [ins->dreg];
2279                         if (spec [MONO_INST_DEST] == 'l') {
2280                                 /* check special case when dreg have been moved from ecx (clob shift) */
2281                                 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2282                                         hreg = clob_dreg + 1;
2283                                 else
2284                                         hreg = ins->dreg + 1;
2285
2286                                 /* base prev_dreg on fixed hreg, handle clob case */
2287                                 prev_dreg = hreg - 1;
2288                         } else
2289                                 prev_dreg = ins->dreg;
2290
2291                         if (val < 0) {
2292                                 int spill = 0;
2293                                 if (val < -1) {
2294                                         /* the register gets spilled after this inst */
2295                                         spill = -val -1;
2296                                 }
2297                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2298                                 rs->iassign [ins->dreg] = val;
2299                                 if (spill)
2300                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2301                         }
2302                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2303                         rs->isymbolic [val] = prev_dreg;
2304                         ins->dreg = val;
2305                         /* handle cases where lreg needs to be eax:edx */
2306                         if (spec [MONO_INST_DEST] == 'l') {
2307                                 /* check special case when dreg have been moved from ecx (clob shift) */
2308                                 int hreg = prev_dreg + 1;
2309                                 val = rs->iassign [hreg];
2310                                 if (val < 0) {
2311                                         int spill = 0;
2312                                         if (val < -1) {
2313                                                 /* the register gets spilled after this inst */
2314                                                 spill = -val -1;
2315                                         }
2316                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2317                                         rs->iassign [hreg] = val;
2318                                         if (spill)
2319                                                 create_spilled_store (cfg, spill, val, hreg, ins);
2320                                 }
2321                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
2322                                 rs->isymbolic [val] = hreg;
2323                                 if (ins->dreg == X86_EAX) {
2324                                         if (val != X86_EDX)
2325                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2326                                 } else if (ins->dreg == X86_EDX) {
2327                                         if (val == X86_EAX) {
2328                                                 /* swap */
2329                                                 g_assert_not_reached ();
2330                                         } else {
2331                                                 /* two forced copies */
2332                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2333                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2334                                         }
2335                                 } else {
2336                                         if (val == X86_EDX) {
2337                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2338                                         } else {
2339                                                 /* two forced copies */
2340                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2341                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2342                                         }
2343                                 }
2344                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2345                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
2346                                         mono_regstate_free_int (rs, val);
2347                                 }
2348                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != X86_EAX && spec [MONO_INST_CLOB] != 'd') {
2349                                 /* this instruction only outputs to EAX, need to copy */
2350                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2351                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != X86_EDX && spec [MONO_INST_CLOB] != 'd') {
2352                                 create_copy_ins (cfg, ins->dreg, X86_EDX, ins);
2353                         }
2354                 }
2355                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
2356                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
2357                         mono_regstate_free_int (rs, ins->dreg);
2358                 }
2359                 /* put src1 in EAX if it needs to be */
2360                 if (spec [MONO_INST_SRC1] == 'a') {
2361                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
2362                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
2363                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2364                                 mono_regstate_free_int (rs, X86_EAX);
2365                         }
2366                         if (ins->sreg1 < MONO_MAX_IREGS) {
2367                                 /* The argument is already in a hard reg, need to copy */
2368                                 MonoInst *copy = create_copy_ins (cfg, X86_EAX, ins->sreg1, NULL);
2369                                 insert_before_ins (ins, tmp, copy);
2370                         }
2371                         else
2372                                 /* force-set sreg1 */
2373                                 assign_ireg (rs, ins->sreg1, X86_EAX);
2374                         ins->sreg1 = X86_EAX;
2375                 }
2376
2377                 /*
2378                  * TRACK SREG1
2379                  */
2380                 if (spec [MONO_INST_SRC1] == 'f') {
2381                         if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
2382                                 MonoInst *load;
2383                                 MonoInst *store = NULL;
2384
2385                                 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2386                                         GList *spill_node;
2387                                         spill_node = g_list_first (fspill_list);
2388                                         g_assert (spill_node);
2389
2390                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);          
2391                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2392                                 }
2393
2394                                 fspill++;
2395                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2396                                 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
2397                                 insert_before_ins (ins, tmp, load);
2398                                 if (store) 
2399                                         insert_before_ins (load, tmp, store);
2400                         }
2401                 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
2402                         /* force source to be same as dest */
2403                         assign_ireg (rs, ins->sreg1, ins->dreg);
2404                         assign_ireg (rs, ins->sreg1 + 1, ins->unused);
2405
2406                         DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
2407                         DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
2408
2409                         ins->sreg1 = ins->dreg;
2410                         /* 
2411                          * No need for saving the reg, we know that src1=dest in this cases
2412                          * ins->inst_c0 = ins->unused;
2413                          */
2414                 }
2415                 else if (ins->sreg1 >= MONO_MAX_IREGS) {
2416                         val = rs->iassign [ins->sreg1];
2417                         prev_sreg1 = ins->sreg1;
2418                         if (val < 0) {
2419                                 int spill = 0;
2420                                 if (val < -1) {
2421                                         /* the register gets spilled after this inst */
2422                                         spill = -val -1;
2423                                 }
2424                                 if (0 && ins->opcode == OP_MOVE) {
2425                                         /* 
2426                                          * small optimization: the dest register is already allocated
2427                                          * but the src one is not: we can simply assign the same register
2428                                          * here and peephole will get rid of the instruction later.
2429                                          * This optimization may interfere with the clobbering handling:
2430                                          * it removes a mov operation that will be added again to handle clobbering.
2431                                          * There are also some other issues that should with make testjit.
2432                                          */
2433                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
2434                                         val = rs->iassign [ins->sreg1] = ins->dreg;
2435                                         //g_assert (val >= 0);
2436                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2437                                 } else {
2438                                         //g_assert (val == -1); /* source cannot be spilled */
2439                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
2440                                         rs->iassign [ins->sreg1] = val;
2441                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2442                                 }
2443                                 if (spill) {
2444                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
2445                                         insert_before_ins (ins, tmp, store);
2446                                 }
2447                         }
2448                         rs->isymbolic [val] = prev_sreg1;
2449                         ins->sreg1 = val;
2450                 } else {
2451                         prev_sreg1 = -1;
2452                 }
2453                 /* handle clobbering of sreg1 */
2454                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
2455                         MonoInst *sreg2_copy = NULL;
2456                         MonoInst *copy = NULL;
2457
2458                         if (ins->dreg == ins->sreg2) {
2459                                 /* 
2460                                  * copying sreg1 to dreg could clobber sreg2, so allocate a new
2461                                  * register for it.
2462                                  */
2463                                 int reg2 = 0;
2464
2465                                 reg2 = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->sreg2, 0);
2466
2467                                 DEBUG (g_print ("\tneed to copy sreg2 %s to reg %s\n", mono_arch_regname (ins->sreg2), mono_arch_regname (reg2)));
2468                                 sreg2_copy = create_copy_ins (cfg, reg2, ins->sreg2, NULL);
2469                                 prev_sreg2 = ins->sreg2 = reg2;
2470
2471                                 mono_regstate_free_int (rs, reg2);
2472                         }
2473
2474                         copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
2475                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
2476                         insert_before_ins (ins, tmp, copy);
2477
2478                         if (sreg2_copy)
2479                                 insert_before_ins (copy, tmp, sreg2_copy);
2480
2481                         /*
2482                          * Need to prevent sreg2 to be allocated to sreg1, since that
2483                          * would screw up the previous copy.
2484                          */
2485                         src2_mask &= ~ (1 << ins->sreg1);
2486                         /* we set sreg1 to dest as well */
2487                         prev_sreg1 = ins->sreg1 = ins->dreg;
2488                         src2_mask &= ~ (1 << ins->dreg);
2489                 }
2490
2491                 /*
2492                  * TRACK SREG2
2493                  */
2494                 if (spec [MONO_INST_SRC2] == 'f') {
2495                         if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
2496                                 MonoInst *load;
2497                                 MonoInst *store = NULL;
2498
2499                                 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2500                                         GList *spill_node;
2501
2502                                         spill_node = g_list_first (fspill_list);
2503                                         g_assert (spill_node);
2504                                         if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
2505                                                 spill_node = g_list_next (spill_node);
2506         
2507                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
2508                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2509                                 } 
2510                                 
2511                                 fspill++;
2512                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2513                                 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
2514                                 insert_before_ins (ins, tmp, load);
2515                                 if (store) 
2516                                         insert_before_ins (load, tmp, store);
2517                         }
2518                 } 
2519                 else if (ins->sreg2 >= MONO_MAX_IREGS) {
2520                         val = rs->iassign [ins->sreg2];
2521                         prev_sreg2 = ins->sreg2;
2522                         if (val < 0) {
2523                                 int spill = 0;
2524                                 if (val < -1) {
2525                                         /* the register gets spilled after this inst */
2526                                         spill = -val -1;
2527                                 }
2528                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
2529                                 rs->iassign [ins->sreg2] = val;
2530                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
2531                                 if (spill)
2532                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
2533                         }
2534                         rs->isymbolic [val] = prev_sreg2;
2535                         ins->sreg2 = val;
2536                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != X86_ECX) {
2537                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [X86_ECX]));
2538                         }
2539                 } else {
2540                         prev_sreg2 = -1;
2541                 }
2542
2543                 if (spec [MONO_INST_CLOB] == 'c') {
2544                         int j, s;
2545                         guint32 clob_mask = X86_CALLEE_REGS;
2546                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
2547                                 s = 1 << j;
2548                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2549                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2550                                 }
2551                         }
2552                 }
2553                 if (spec [MONO_INST_CLOB] == 'a') {
2554                         guint32 clob_reg = X86_EAX;
2555                         if (!(rs->ifree_mask & (1 << clob_reg)) && (rs->isymbolic [clob_reg] >= 8)) {
2556                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2557                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2558                                 mono_regstate_free_int (rs, clob_reg);
2559                         }
2560                 }
2561                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2562                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2563                         mono_regstate_free_int (rs, ins->sreg1);
2564                 }
2565                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2566                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2567                         mono_regstate_free_int (rs, ins->sreg2);
2568                 }*/
2569         
2570                 //DEBUG (print_ins (i, ins));
2571                 /* this may result from a insert_before call */
2572                 if (!tmp->next)
2573                         bb->code = tmp->data;
2574                 tmp = tmp->next;
2575         }
2576
2577         g_free (reginfo);
2578         g_free (reginfof);
2579         g_list_free (fspill_list);
2580 }
2581
2582 static unsigned char*
2583 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2584 {
2585         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2586         x86_fnstcw_membase(code, X86_ESP, 0);
2587         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
2588         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2589         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
2590         x86_fldcw_membase (code, X86_ESP, 2);
2591         if (size == 8) {
2592                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2593                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2594                 x86_pop_reg (code, dreg);
2595                 /* FIXME: need the high register 
2596                  * x86_pop_reg (code, dreg_high);
2597                  */
2598         } else {
2599                 x86_push_reg (code, X86_EAX); // SP = SP - 4
2600                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
2601                 x86_pop_reg (code, dreg);
2602         }
2603         x86_fldcw_membase (code, X86_ESP, 0);
2604         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2605
2606         if (size == 1)
2607                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
2608         else if (size == 2)
2609                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
2610         return code;
2611 }
2612
2613 static unsigned char*
2614 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2615 {
2616         int sreg = tree->sreg1;
2617         int need_touch = FALSE;
2618
2619 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
2620         need_touch = TRUE;
2621 #endif
2622
2623         if (need_touch) {
2624                 guint8* br[5];
2625
2626                 /*
2627                  * Under Windows:
2628                  * If requested stack size is larger than one page,
2629                  * perform stack-touch operation
2630                  */
2631                 /*
2632                  * Generate stack probe code.
2633                  * Under Windows, it is necessary to allocate one page at a time,
2634                  * "touching" stack after each successful sub-allocation. This is
2635                  * because of the way stack growth is implemented - there is a
2636                  * guard page before the lowest stack page that is currently commited.
2637                  * Stack normally grows sequentially so OS traps access to the
2638                  * guard page and commits more pages when needed.
2639                  */
2640                 x86_test_reg_imm (code, sreg, ~0xFFF);
2641                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2642
2643                 br[2] = code; /* loop */
2644                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
2645                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
2646
2647                 /* 
2648                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
2649                  * that follows only initializes the last part of the area.
2650                  */
2651                 /* Same as the init code below with size==0x1000 */
2652                 if (tree->flags & MONO_INST_INIT) {
2653                         x86_push_reg (code, X86_EAX);
2654                         x86_push_reg (code, X86_ECX);
2655                         x86_push_reg (code, X86_EDI);
2656                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
2657                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
2658                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2659                         x86_cld (code);
2660                         x86_prefix (code, X86_REP_PREFIX);
2661                         x86_stosl (code);
2662                         x86_pop_reg (code, X86_EDI);
2663                         x86_pop_reg (code, X86_ECX);
2664                         x86_pop_reg (code, X86_EAX);
2665                 }
2666
2667                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2668                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2669                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2670                 x86_patch (br[3], br[2]);
2671                 x86_test_reg_reg (code, sreg, sreg);
2672                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2673                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2674
2675                 br[1] = code; x86_jump8 (code, 0);
2676
2677                 x86_patch (br[0], code);
2678                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2679                 x86_patch (br[1], code);
2680                 x86_patch (br[4], code);
2681         }
2682         else
2683                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
2684
2685         if (tree->flags & MONO_INST_INIT) {
2686                 int offset = 0;
2687                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
2688                         x86_push_reg (code, X86_EAX);
2689                         offset += 4;
2690                 }
2691                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
2692                         x86_push_reg (code, X86_ECX);
2693                         offset += 4;
2694                 }
2695                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
2696                         x86_push_reg (code, X86_EDI);
2697                         offset += 4;
2698                 }
2699                 
2700                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
2701                 if (sreg != X86_ECX)
2702                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
2703                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2704                                 
2705                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2706                 x86_cld (code);
2707                 x86_prefix (code, X86_REP_PREFIX);
2708                 x86_stosl (code);
2709                 
2710                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2711                         x86_pop_reg (code, X86_EDI);
2712                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2713                         x86_pop_reg (code, X86_ECX);
2714                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2715                         x86_pop_reg (code, X86_EAX);
2716         }
2717         return code;
2718 }
2719
2720
2721 static guint8*
2722 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2723 {
2724         CallInfo *cinfo;
2725         int quad;
2726
2727         /* Move return value to the target register */
2728         switch (ins->opcode) {
2729         case CEE_CALL:
2730         case OP_CALL_REG:
2731         case OP_CALL_MEMBASE:
2732                 if (ins->dreg != X86_EAX)
2733                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2734                 break;
2735         case OP_VCALL:
2736         case OP_VCALL_REG:
2737         case OP_VCALL_MEMBASE:
2738                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
2739                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2740                         /* Pop the destination address from the stack */
2741                         x86_pop_reg (code, X86_ECX);
2742                         
2743                         for (quad = 0; quad < 2; quad ++) {
2744                                 switch (cinfo->ret.pair_storage [quad]) {
2745                                 case ArgInIReg:
2746                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
2747                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
2748                                         break;
2749                                 case ArgNone:
2750                                         break;
2751                                 default:
2752                                         g_assert_not_reached ();
2753                                 }
2754                         }
2755                 }
2756                 g_free (cinfo);
2757         default:
2758                 break;
2759         }
2760
2761         return code;
2762 }
2763
2764 static guint8*
2765 emit_tls_get (guint8* code, int dreg, int tls_offset)
2766 {
2767 #ifdef PLATFORM_WIN32
2768         /* 
2769          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
2770          * Journal and/or a disassembly of the TlsGet () function.
2771          */
2772         g_assert (tls_offset < 64);
2773         x86_prefix (code, X86_FS_PREFIX);
2774         x86_mov_reg_mem (code, dreg, 0x18, 4);
2775         /* Dunno what this does but TlsGetValue () contains it */
2776         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2777         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2778 #else
2779         x86_prefix (code, X86_GS_PREFIX);
2780         x86_mov_reg_mem (code, dreg, tls_offset, 4);                    
2781 #endif
2782         return code;
2783 }
2784
2785 #define REAL_PRINT_REG(text,reg) \
2786 mono_assert (reg >= 0); \
2787 x86_push_reg (code, X86_EAX); \
2788 x86_push_reg (code, X86_EDX); \
2789 x86_push_reg (code, X86_ECX); \
2790 x86_push_reg (code, reg); \
2791 x86_push_imm (code, reg); \
2792 x86_push_imm (code, text " %d %p\n"); \
2793 x86_mov_reg_imm (code, X86_EAX, printf); \
2794 x86_call_reg (code, X86_EAX); \
2795 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2796 x86_pop_reg (code, X86_ECX); \
2797 x86_pop_reg (code, X86_EDX); \
2798 x86_pop_reg (code, X86_EAX);
2799
2800 /* benchmark and set based on cpu */
2801 #define LOOP_ALIGNMENT 8
2802 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2803
2804 void
2805 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2806 {
2807         MonoInst *ins;
2808         MonoCallInst *call;
2809         guint offset;
2810         guint8 *code = cfg->native_code + cfg->code_len;
2811         MonoInst *last_ins = NULL;
2812         guint last_offset = 0;
2813         int max_len, cpos;
2814
2815         if (cfg->opt & MONO_OPT_PEEPHOLE)
2816                 peephole_pass (cfg, bb);
2817
2818         if (cfg->opt & MONO_OPT_LOOP) {
2819                 int pad, align = LOOP_ALIGNMENT;
2820                 /* set alignment depending on cpu */
2821                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2822                         pad = align - pad;
2823                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2824                         x86_padding (code, pad);
2825                         cfg->code_len += pad;
2826                         bb->native_offset = cfg->code_len;
2827                 }
2828         }
2829
2830         if (cfg->verbose_level > 2)
2831                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2832
2833         cpos = bb->max_offset;
2834
2835         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2836                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2837                 g_assert (!cfg->compile_aot);
2838                 cpos += 6;
2839
2840                 cov->data [bb->dfn].cil_code = bb->cil_code;
2841                 /* this is not thread save, but good enough */
2842                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2843         }
2844
2845         offset = code - cfg->native_code;
2846
2847         ins = bb->code;
2848         while (ins) {
2849                 offset = code - cfg->native_code;
2850
2851                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2852
2853                 if (offset > (cfg->code_size - max_len - 16)) {
2854                         cfg->code_size *= 2;
2855                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2856                         code = cfg->native_code + offset;
2857                         mono_jit_stats.code_reallocs++;
2858                 }
2859
2860                 mono_debug_record_line_number (cfg, ins, offset);
2861
2862                 switch (ins->opcode) {
2863                 case OP_BIGMUL:
2864                         x86_mul_reg (code, ins->sreg2, TRUE);
2865                         break;
2866                 case OP_BIGMUL_UN:
2867                         x86_mul_reg (code, ins->sreg2, FALSE);
2868                         break;
2869                 case OP_X86_SETEQ_MEMBASE:
2870                 case OP_X86_SETNE_MEMBASE:
2871                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2872                                          ins->inst_basereg, ins->inst_offset, TRUE);
2873                         break;
2874                 case OP_STOREI1_MEMBASE_IMM:
2875                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2876                         break;
2877                 case OP_STOREI2_MEMBASE_IMM:
2878                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2879                         break;
2880                 case OP_STORE_MEMBASE_IMM:
2881                 case OP_STOREI4_MEMBASE_IMM:
2882                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2883                         break;
2884                 case OP_STOREI1_MEMBASE_REG:
2885                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2886                         break;
2887                 case OP_STOREI2_MEMBASE_REG:
2888                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2889                         break;
2890                 case OP_STORE_MEMBASE_REG:
2891                 case OP_STOREI4_MEMBASE_REG:
2892                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2893                         break;
2894                 case CEE_LDIND_I:
2895                 case CEE_LDIND_I4:
2896                 case CEE_LDIND_U4:
2897                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2898                         break;
2899                 case OP_LOADU4_MEM:
2900                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2901                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2902                         break;
2903                 case OP_LOAD_MEMBASE:
2904                 case OP_LOADI4_MEMBASE:
2905                 case OP_LOADU4_MEMBASE:
2906                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2907                         break;
2908                 case OP_LOADU1_MEMBASE:
2909                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2910                         break;
2911                 case OP_LOADI1_MEMBASE:
2912                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2913                         break;
2914                 case OP_LOADU2_MEMBASE:
2915                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2916                         break;
2917                 case OP_LOADI2_MEMBASE:
2918                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2919                         break;
2920                 case CEE_CONV_I1:
2921                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2922                         break;
2923                 case CEE_CONV_I2:
2924                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2925                         break;
2926                 case CEE_CONV_U1:
2927                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2928                         break;
2929                 case CEE_CONV_U2:
2930                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2931                         break;
2932                 case OP_COMPARE:
2933                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2934                         break;
2935                 case OP_COMPARE_IMM:
2936                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2937                         break;
2938                 case OP_X86_COMPARE_MEMBASE_REG:
2939                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2940                         break;
2941                 case OP_X86_COMPARE_MEMBASE_IMM:
2942                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2943                         break;
2944                 case OP_X86_COMPARE_MEMBASE8_IMM:
2945                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2946                         break;
2947                 case OP_X86_COMPARE_REG_MEMBASE:
2948                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2949                         break;
2950                 case OP_X86_COMPARE_MEM_IMM:
2951                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2952                         break;
2953                 case OP_X86_TEST_NULL:
2954                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2955                         break;
2956                 case OP_X86_ADD_MEMBASE_IMM:
2957                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2958                         break;
2959                 case OP_X86_ADD_MEMBASE:
2960                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2961                         break;
2962                 case OP_X86_SUB_MEMBASE_IMM:
2963                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2964                         break;
2965                 case OP_X86_SUB_MEMBASE:
2966                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2967                         break;
2968                 case OP_X86_INC_MEMBASE:
2969                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2970                         break;
2971                 case OP_X86_INC_REG:
2972                         x86_inc_reg (code, ins->dreg);
2973                         break;
2974                 case OP_X86_DEC_MEMBASE:
2975                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2976                         break;
2977                 case OP_X86_DEC_REG:
2978                         x86_dec_reg (code, ins->dreg);
2979                         break;
2980                 case OP_X86_MUL_MEMBASE:
2981                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2982                         break;
2983                 case CEE_BREAK:
2984                         x86_breakpoint (code);
2985                         break;
2986                 case OP_ADDCC:
2987                 case CEE_ADD:
2988                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2989                         break;
2990                 case OP_ADC:
2991                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2992                         break;
2993                 case OP_ADDCC_IMM:
2994                 case OP_ADD_IMM:
2995                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2996                         break;
2997                 case OP_ADC_IMM:
2998                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2999                         break;
3000                 case OP_SUBCC:
3001                 case CEE_SUB:
3002                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
3003                         break;
3004                 case OP_SBB:
3005                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
3006                         break;
3007                 case OP_SUBCC_IMM:
3008                 case OP_SUB_IMM:
3009                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
3010                         break;
3011                 case OP_SBB_IMM:
3012                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
3013                         break;
3014                 case CEE_AND:
3015                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
3016                         break;
3017                 case OP_AND_IMM:
3018                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
3019                         break;
3020                 case CEE_DIV:
3021                         x86_cdq (code);
3022                         x86_div_reg (code, ins->sreg2, TRUE);
3023                         break;
3024                 case CEE_DIV_UN:
3025                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
3026                         x86_div_reg (code, ins->sreg2, FALSE);
3027                         break;
3028                 case OP_DIV_IMM:
3029                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3030                         x86_cdq (code);
3031                         x86_div_reg (code, ins->sreg2, TRUE);
3032                         break;
3033                 case CEE_REM:
3034                         x86_cdq (code);
3035                         x86_div_reg (code, ins->sreg2, TRUE);
3036                         break;
3037                 case CEE_REM_UN:
3038                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
3039                         x86_div_reg (code, ins->sreg2, FALSE);
3040                         break;
3041                 case OP_REM_IMM:
3042                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3043                         x86_cdq (code);
3044                         x86_div_reg (code, ins->sreg2, TRUE);
3045                         break;
3046                 case CEE_OR:
3047                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
3048                         break;
3049                 case OP_OR_IMM:
3050                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
3051                         break;
3052                 case CEE_XOR:
3053                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
3054                         break;
3055                 case OP_XOR_IMM:
3056                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
3057                         break;
3058                 case CEE_SHL:
3059                         g_assert (ins->sreg2 == X86_ECX);
3060                         x86_shift_reg (code, X86_SHL, ins->dreg);
3061                         break;
3062                 case CEE_SHR:
3063                         g_assert (ins->sreg2 == X86_ECX);
3064                         x86_shift_reg (code, X86_SAR, ins->dreg);
3065                         break;
3066                 case OP_SHR_IMM:
3067                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
3068                         break;
3069                 case OP_SHR_UN_IMM:
3070                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
3071                         break;
3072                 case CEE_SHR_UN:
3073                         g_assert (ins->sreg2 == X86_ECX);
3074                         x86_shift_reg (code, X86_SHR, ins->dreg);
3075                         break;
3076                 case OP_SHL_IMM:
3077                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
3078                         break;
3079                 case OP_LSHL: {
3080                         guint8 *jump_to_end;
3081
3082                         /* handle shifts below 32 bits */
3083                         x86_shld_reg (code, ins->unused, ins->sreg1);
3084                         x86_shift_reg (code, X86_SHL, ins->sreg1);
3085
3086                         x86_test_reg_imm (code, X86_ECX, 32);
3087                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3088
3089                         /* handle shift over 32 bit */
3090                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3091                         x86_clear_reg (code, ins->sreg1);
3092                         
3093                         x86_patch (jump_to_end, code);
3094                         }
3095                         break;
3096                 case OP_LSHR: {
3097                         guint8 *jump_to_end;
3098
3099                         /* handle shifts below 32 bits */
3100                         x86_shrd_reg (code, ins->sreg1, ins->unused);
3101                         x86_shift_reg (code, X86_SAR, ins->unused);
3102
3103                         x86_test_reg_imm (code, X86_ECX, 32);
3104                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
3105
3106                         /* handle shifts over 31 bits */
3107                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3108                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
3109                         
3110                         x86_patch (jump_to_end, code);
3111                         }
3112                         break;
3113                 case OP_LSHR_UN: {
3114                         guint8 *jump_to_end;
3115
3116                         /* handle shifts below 32 bits */
3117                         x86_shrd_reg (code, ins->sreg1, ins->unused);
3118                         x86_shift_reg (code, X86_SHR, ins->unused);
3119
3120                         x86_test_reg_imm (code, X86_ECX, 32);
3121                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
3122
3123                         /* handle shifts over 31 bits */
3124                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3125                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
3126                         
3127                         x86_patch (jump_to_end, code);
3128                         }
3129                         break;
3130                 case OP_LSHL_IMM:
3131                         if (ins->inst_imm >= 32) {
3132                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3133                                 x86_clear_reg (code, ins->sreg1);
3134                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
3135                         } else {
3136                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
3137                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
3138                         }
3139                         break;
3140                 case OP_LSHR_IMM:
3141                         if (ins->inst_imm >= 32) {
3142                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
3143                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
3144                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
3145                         } else {
3146                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3147                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
3148                         }
3149                         break;
3150                 case OP_LSHR_UN_IMM:
3151                         if (ins->inst_imm >= 32) {
3152                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3153                                 x86_clear_reg (code, ins->unused);
3154                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
3155                         } else {
3156                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3157                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
3158                         }
3159                         break;
3160                 case CEE_NOT:
3161                         x86_not_reg (code, ins->sreg1);
3162                         break;
3163                 case CEE_NEG:
3164                         x86_neg_reg (code, ins->sreg1);
3165                         break;
3166                 case OP_SEXT_I1:
3167                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3168                         break;
3169                 case OP_SEXT_I2:
3170                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3171                         break;
3172                 case CEE_MUL:
3173                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3174                         break;
3175                 case OP_MUL_IMM:
3176                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
3177                         break;
3178                 case CEE_MUL_OVF:
3179                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3180                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3181                         break;
3182                 case CEE_MUL_OVF_UN: {
3183                         /* the mul operation and the exception check should most likely be split */
3184                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
3185                         /*g_assert (ins->sreg2 == X86_EAX);
3186                         g_assert (ins->dreg == X86_EAX);*/
3187                         if (ins->sreg2 == X86_EAX) {
3188                                 non_eax_reg = ins->sreg1;
3189                         } else if (ins->sreg1 == X86_EAX) {
3190                                 non_eax_reg = ins->sreg2;
3191                         } else {
3192                                 /* no need to save since we're going to store to it anyway */
3193                                 if (ins->dreg != X86_EAX) {
3194                                         saved_eax = TRUE;
3195                                         x86_push_reg (code, X86_EAX);
3196                                 }
3197                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
3198                                 non_eax_reg = ins->sreg2;
3199                         }
3200                         if (ins->dreg == X86_EDX) {
3201                                 if (!saved_eax) {
3202                                         saved_eax = TRUE;
3203                                         x86_push_reg (code, X86_EAX);
3204                                 }
3205                         } else if (ins->dreg != X86_EAX) {
3206                                 saved_edx = TRUE;
3207                                 x86_push_reg (code, X86_EDX);
3208                         }
3209                         x86_mul_reg (code, non_eax_reg, FALSE);
3210                         /* save before the check since pop and mov don't change the flags */
3211                         if (ins->dreg != X86_EAX)
3212                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3213                         if (saved_edx)
3214                                 x86_pop_reg (code, X86_EDX);
3215                         if (saved_eax)
3216                                 x86_pop_reg (code, X86_EAX);
3217                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3218                         break;
3219                 }
3220                 case OP_ICONST:
3221                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
3222                         break;
3223                 case OP_AOTCONST:
3224                         g_assert_not_reached ();
3225                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
3226                         x86_mov_reg_imm (code, ins->dreg, 0);
3227                         break;
3228                 case OP_LOAD_GOTADDR:
3229                         x86_call_imm (code, 0);
3230                         /* 
3231                          * The patch needs to point to the pop, since the GOT offset needs 
3232                          * to be added to that address.
3233                          */
3234                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3235                         x86_pop_reg (code, ins->dreg);
3236                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
3237                         break;
3238                 case OP_GOT_ENTRY:
3239                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3240                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
3241                         break;
3242                 case OP_X86_PUSH_GOT_ENTRY:
3243                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3244                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
3245                         break;
3246                 case CEE_CONV_I4:
3247                 case OP_MOVE:
3248                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3249                         break;
3250                 case CEE_CONV_U4:
3251                         g_assert_not_reached ();
3252                 case CEE_JMP: {
3253                         /*
3254                          * Note: this 'frame destruction' logic is useful for tail calls, too.
3255                          * Keep in sync with the code in emit_epilog.
3256                          */
3257                         int pos = 0;
3258
3259                         /* FIXME: no tracing support... */
3260                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3261                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3262                         /* reset offset to make max_len work */
3263                         offset = code - cfg->native_code;
3264
3265                         g_assert (!cfg->method->save_lmf);
3266
3267                         if (cfg->used_int_regs & (1 << X86_EBX))
3268                                 pos -= 4;
3269                         if (cfg->used_int_regs & (1 << X86_EDI))
3270                                 pos -= 4;
3271                         if (cfg->used_int_regs & (1 << X86_ESI))
3272                                 pos -= 4;
3273                         if (pos)
3274                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3275         
3276                         if (cfg->used_int_regs & (1 << X86_ESI))
3277                                 x86_pop_reg (code, X86_ESI);
3278                         if (cfg->used_int_regs & (1 << X86_EDI))
3279                                 x86_pop_reg (code, X86_EDI);
3280                         if (cfg->used_int_regs & (1 << X86_EBX))
3281                                 x86_pop_reg (code, X86_EBX);
3282         
3283                         /* restore ESP/EBP */
3284                         x86_leave (code);
3285                         offset = code - cfg->native_code;
3286                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3287                         x86_jump32 (code, 0);
3288                         break;
3289                 }
3290                 case OP_CHECK_THIS:
3291                         /* ensure ins->sreg1 is not NULL
3292                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
3293                          * cmp DWORD PTR [eax], 0
3294                          */
3295                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
3296                         break;
3297                 case OP_ARGLIST: {
3298                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
3299                         x86_push_reg (code, hreg);
3300                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
3301                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
3302                         x86_pop_reg (code, hreg);
3303                         break;
3304                 }
3305                 case OP_FCALL:
3306                 case OP_LCALL:
3307                 case OP_VCALL:
3308                 case OP_VOIDCALL:
3309                 case CEE_CALL:
3310                         call = (MonoCallInst*)ins;
3311                         if (ins->flags & MONO_INST_HAS_METHOD)
3312                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
3313                         else
3314                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
3315                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3316                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
3317                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
3318                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
3319                                  * smart enough to do that optimization yet
3320                                  *
3321                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
3322                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
3323                                  * (most likely from locality benefits). People with other processors should
3324                                  * check on theirs to see what happens.
3325                                  */
3326                                 if (call->stack_usage == 4) {
3327                                         /* we want to use registers that won't get used soon, so use
3328                                          * ecx, as eax will get allocated first. edx is used by long calls,
3329                                          * so we can't use that.
3330                                          */
3331                                         
3332                                         x86_pop_reg (code, X86_ECX);
3333                                 } else {
3334                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3335                                 }
3336                         }
3337                         code = emit_move_return_value (cfg, ins, code);
3338                         break;
3339                 case OP_FCALL_REG:
3340                 case OP_LCALL_REG:
3341                 case OP_VCALL_REG:
3342                 case OP_VOIDCALL_REG:
3343                 case OP_CALL_REG:
3344                         call = (MonoCallInst*)ins;
3345                         x86_call_reg (code, ins->sreg1);
3346                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3347                                 if (call->stack_usage == 4)
3348                                         x86_pop_reg (code, X86_ECX);
3349                                 else
3350                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3351                         }
3352                         code = emit_move_return_value (cfg, ins, code);
3353                         break;
3354                 case OP_FCALL_MEMBASE:
3355                 case OP_LCALL_MEMBASE:
3356                 case OP_VCALL_MEMBASE:
3357                 case OP_VOIDCALL_MEMBASE:
3358                 case OP_CALL_MEMBASE:
3359                         call = (MonoCallInst*)ins;
3360                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
3361                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3362                                 if (call->stack_usage == 4)
3363                                         x86_pop_reg (code, X86_ECX);
3364                                 else
3365                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3366                         }
3367                         code = emit_move_return_value (cfg, ins, code);
3368                         break;
3369                 case OP_OUTARG:
3370                 case OP_X86_PUSH:
3371                         x86_push_reg (code, ins->sreg1);
3372                         break;
3373                 case OP_X86_PUSH_IMM:
3374                         x86_push_imm (code, ins->inst_imm);
3375                         break;
3376                 case OP_X86_PUSH_MEMBASE:
3377                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
3378                         break;
3379                 case OP_X86_PUSH_OBJ: 
3380                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
3381                         x86_push_reg (code, X86_EDI);
3382                         x86_push_reg (code, X86_ESI);
3383                         x86_push_reg (code, X86_ECX);
3384                         if (ins->inst_offset)
3385                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
3386                         else
3387                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
3388                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
3389                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
3390                         x86_cld (code);
3391                         x86_prefix (code, X86_REP_PREFIX);
3392                         x86_movsd (code);
3393                         x86_pop_reg (code, X86_ECX);
3394                         x86_pop_reg (code, X86_ESI);
3395                         x86_pop_reg (code, X86_EDI);
3396                         break;
3397                 case OP_X86_LEA:
3398                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
3399                         break;
3400                 case OP_X86_LEA_MEMBASE:
3401                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3402                         break;
3403                 case OP_X86_XCHG:
3404                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3405                         break;
3406                 case OP_LOCALLOC:
3407                         /* keep alignment */
3408                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
3409                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
3410                         code = mono_emit_stack_alloc (code, ins);
3411                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
3412                         break;
3413                 case CEE_RET:
3414                         x86_ret (code);
3415                         break;
3416                 case CEE_THROW: {
3417                         x86_push_reg (code, ins->sreg1);
3418                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3419                                                           (gpointer)"mono_arch_throw_exception");
3420                         break;
3421                 }
3422                 case OP_RETHROW: {
3423                         x86_push_reg (code, ins->sreg1);
3424                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3425                                                           (gpointer)"mono_arch_rethrow_exception");
3426                         break;
3427                 }
3428                 case OP_CALL_HANDLER: 
3429                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3430                         x86_call_imm (code, 0);
3431                         break;
3432                 case OP_LABEL:
3433                         ins->inst_c0 = code - cfg->native_code;
3434                         break;
3435                 case CEE_BR:
3436                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
3437                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
3438                         //break;
3439                         if (ins->flags & MONO_INST_BRLABEL) {
3440                                 if (ins->inst_i0->inst_c0) {
3441                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
3442                                 } else {
3443                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
3444                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3445                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
3446                                                 x86_jump8 (code, 0);
3447                                         else 
3448                                                 x86_jump32 (code, 0);
3449                                 }
3450                         } else {
3451                                 if (ins->inst_target_bb->native_offset) {
3452                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
3453                                 } else {
3454                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3455                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3456                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3457                                                 x86_jump8 (code, 0);
3458                                         else 
3459                                                 x86_jump32 (code, 0);
3460                                 } 
3461                         }
3462                         break;
3463                 case OP_BR_REG:
3464                         x86_jump_reg (code, ins->sreg1);
3465                         break;
3466                 case OP_CEQ:
3467                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3468                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3469                         break;
3470                 case OP_CLT:
3471                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
3472                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3473                         break;
3474                 case OP_CLT_UN:
3475                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3476                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3477                         break;
3478                 case OP_CGT:
3479                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
3480                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3481                         break;
3482                 case OP_CGT_UN:
3483                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3484                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3485                         break;
3486                 case OP_CNE:
3487                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
3488                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3489                         break;
3490                 case OP_COND_EXC_EQ:
3491                 case OP_COND_EXC_NE_UN:
3492                 case OP_COND_EXC_LT:
3493                 case OP_COND_EXC_LT_UN:
3494                 case OP_COND_EXC_GT:
3495                 case OP_COND_EXC_GT_UN:
3496                 case OP_COND_EXC_GE:
3497                 case OP_COND_EXC_GE_UN:
3498                 case OP_COND_EXC_LE:
3499                 case OP_COND_EXC_LE_UN:
3500                 case OP_COND_EXC_OV:
3501                 case OP_COND_EXC_NO:
3502                 case OP_COND_EXC_C:
3503                 case OP_COND_EXC_NC:
3504                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
3505                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3506                         break;
3507                 case CEE_BEQ:
3508                 case CEE_BNE_UN:
3509                 case CEE_BLT:
3510                 case CEE_BLT_UN:
3511                 case CEE_BGT:
3512                 case CEE_BGT_UN:
3513                 case CEE_BGE:
3514                 case CEE_BGE_UN:
3515                 case CEE_BLE:
3516                 case CEE_BLE_UN:
3517                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
3518                         break;
3519
3520                 /* floating point opcodes */
3521                 case OP_R8CONST: {
3522                         double d = *(double *)ins->inst_p0;
3523
3524                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3525                                 x86_fldz (code);
3526                         } else if (d == 1.0) {
3527                                 x86_fld1 (code);
3528                         } else {
3529                                 if (cfg->compile_aot) {
3530                                         guint32 *val = (guint32*)&d;
3531                                         x86_push_imm (code, val [1]);
3532                                         x86_push_imm (code, val [0]);
3533                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
3534                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3535                                 }
3536                                 else {
3537                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3538                                         x86_fld (code, NULL, TRUE);
3539                                 }
3540                         }
3541                         break;
3542                 }
3543                 case OP_R4CONST: {
3544                         float f = *(float *)ins->inst_p0;
3545
3546                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3547                                 x86_fldz (code);
3548                         } else if (f == 1.0) {
3549                                 x86_fld1 (code);
3550                         } else {
3551                                 if (cfg->compile_aot) {
3552                                         guint32 val = *(guint32*)&f;
3553                                         x86_push_imm (code, val);
3554                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
3555                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3556                                 }
3557                                 else {
3558                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3559                                         x86_fld (code, NULL, FALSE);
3560                                 }
3561                         }
3562                         break;
3563                 }
3564                 case OP_STORER8_MEMBASE_REG:
3565                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3566                         break;
3567                 case OP_LOADR8_SPILL_MEMBASE:
3568                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3569                         x86_fxch (code, 1);
3570                         break;
3571                 case OP_LOADR8_MEMBASE:
3572                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3573                         break;
3574                 case OP_STORER4_MEMBASE_REG:
3575                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3576                         break;
3577                 case OP_LOADR4_MEMBASE:
3578                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3579                         break;
3580                 case CEE_CONV_R4: /* FIXME: change precision */
3581                 case CEE_CONV_R8:
3582                         x86_push_reg (code, ins->sreg1);
3583                         x86_fild_membase (code, X86_ESP, 0, FALSE);
3584                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3585                         break;
3586                 case OP_X86_FP_LOAD_I8:
3587                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3588                         break;
3589                 case OP_X86_FP_LOAD_I4:
3590                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3591                         break;
3592                 case OP_FCONV_TO_I1:
3593                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3594                         break;
3595                 case OP_FCONV_TO_U1:
3596                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3597                         break;
3598                 case OP_FCONV_TO_I2:
3599                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3600                         break;
3601                 case OP_FCONV_TO_U2:
3602                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3603                         break;
3604                 case OP_FCONV_TO_I4:
3605                 case OP_FCONV_TO_I:
3606                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3607                         break;
3608                 case OP_FCONV_TO_I8:
3609                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3610                         x86_fnstcw_membase(code, X86_ESP, 0);
3611                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3612                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3613                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3614                         x86_fldcw_membase (code, X86_ESP, 2);
3615                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3616                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3617                         x86_pop_reg (code, ins->dreg);
3618                         x86_pop_reg (code, ins->unused);
3619                         x86_fldcw_membase (code, X86_ESP, 0);
3620                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3621                         break;
3622                 case OP_LCONV_TO_R_UN: { 
3623                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3624                         guint8 *br;
3625
3626                         /* load 64bit integer to FP stack */
3627                         x86_push_imm (code, 0);
3628                         x86_push_reg (code, ins->sreg2);
3629                         x86_push_reg (code, ins->sreg1);
3630                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3631                         /* store as 80bit FP value */
3632                         x86_fst80_membase (code, X86_ESP, 0);
3633                         
3634                         /* test if lreg is negative */
3635                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3636                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3637         
3638                         /* add correction constant mn */
3639                         x86_fld80_mem (code, mn);
3640                         x86_fld80_membase (code, X86_ESP, 0);
3641                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3642                         x86_fst80_membase (code, X86_ESP, 0);
3643
3644                         x86_patch (br, code);
3645
3646                         x86_fld80_membase (code, X86_ESP, 0);
3647                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
3648
3649                         break;
3650                 }
3651                 case OP_LCONV_TO_OVF_I: {
3652                         guint8 *br [3], *label [1];
3653
3654                         /* 
3655                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3656                          */
3657                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3658
3659                         /* If the low word top bit is set, see if we are negative */
3660                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3661                         /* We are not negative (no top bit set, check for our top word to be zero */
3662                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3663                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3664                         label [0] = code;
3665
3666                         /* throw exception */
3667                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3668                         x86_jump32 (code, 0);
3669         
3670                         x86_patch (br [0], code);
3671                         /* our top bit is set, check that top word is 0xfffffff */
3672                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3673                 
3674                         x86_patch (br [1], code);
3675                         /* nope, emit exception */
3676                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3677                         x86_patch (br [2], label [0]);
3678
3679                         if (ins->dreg != ins->sreg1)
3680                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3681                         break;
3682                 }
3683                 case OP_FADD:
3684                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3685                         break;
3686                 case OP_FSUB:
3687                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3688                         break;          
3689                 case OP_FMUL:
3690                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3691                         break;          
3692                 case OP_FDIV:
3693                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3694                         break;          
3695                 case OP_FNEG:
3696                         x86_fchs (code);
3697                         break;          
3698                 case OP_SIN:
3699                         x86_fsin (code);
3700                         x86_fldz (code);
3701                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3702                         break;          
3703                 case OP_COS:
3704                         x86_fcos (code);
3705                         x86_fldz (code);
3706                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3707                         break;          
3708                 case OP_ABS:
3709                         x86_fabs (code);
3710                         break;          
3711                 case OP_TAN: {
3712                         /* 
3713                          * it really doesn't make sense to inline all this code,
3714                          * it's here just to show that things may not be as simple 
3715                          * as they appear.
3716                          */
3717                         guchar *check_pos, *end_tan, *pop_jump;
3718                         x86_push_reg (code, X86_EAX);
3719                         x86_fptan (code);
3720                         x86_fnstsw (code);
3721                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3722                         check_pos = code;
3723                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3724                         x86_fstp (code, 0); /* pop the 1.0 */
3725                         end_tan = code;
3726                         x86_jump8 (code, 0);
3727                         x86_fldpi (code);
3728                         x86_fp_op (code, X86_FADD, 0);
3729                         x86_fxch (code, 1);
3730                         x86_fprem1 (code);
3731                         x86_fstsw (code);
3732                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3733                         pop_jump = code;
3734                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3735                         x86_fstp (code, 1);
3736                         x86_fptan (code);
3737                         x86_patch (pop_jump, code);
3738                         x86_fstp (code, 0); /* pop the 1.0 */
3739                         x86_patch (check_pos, code);
3740                         x86_patch (end_tan, code);
3741                         x86_fldz (code);
3742                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3743                         x86_pop_reg (code, X86_EAX);
3744                         break;
3745                 }
3746                 case OP_ATAN:
3747                         x86_fld1 (code);
3748                         x86_fpatan (code);
3749                         x86_fldz (code);
3750                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3751                         break;          
3752                 case OP_SQRT:
3753                         x86_fsqrt (code);
3754                         break;          
3755                 case OP_X86_FPOP:
3756                         x86_fstp (code, 0);
3757                         break;          
3758                 case OP_FREM: {
3759                         guint8 *l1, *l2;
3760
3761                         x86_push_reg (code, X86_EAX);
3762                         /* we need to exchange ST(0) with ST(1) */
3763                         x86_fxch (code, 1);
3764
3765                         /* this requires a loop, because fprem somtimes 
3766                          * returns a partial remainder */
3767                         l1 = code;
3768                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3769                         /* x86_fprem1 (code); */
3770                         x86_fprem (code);
3771                         x86_fnstsw (code);
3772                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3773                         l2 = code + 2;
3774                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3775
3776                         /* pop result */
3777                         x86_fstp (code, 1);
3778
3779                         x86_pop_reg (code, X86_EAX);
3780                         break;
3781                 }
3782                 case OP_FCOMPARE:
3783                         if (cfg->opt & MONO_OPT_FCMOV) {
3784                                 x86_fcomip (code, 1);
3785                                 x86_fstp (code, 0);
3786                                 break;
3787                         }
3788                         /* this overwrites EAX */
3789                         EMIT_FPCOMPARE(code);
3790                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3791                         break;
3792                 case OP_FCEQ:
3793                         if (cfg->opt & MONO_OPT_FCMOV) {
3794                                 /* zeroing the register at the start results in 
3795                                  * shorter and faster code (we can also remove the widening op)
3796                                  */
3797                                 guchar *unordered_check;
3798                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3799                                 x86_fcomip (code, 1);
3800                                 x86_fstp (code, 0);
3801                                 unordered_check = code;
3802                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3803                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3804                                 x86_patch (unordered_check, code);
3805                                 break;
3806                         }
3807                         if (ins->dreg != X86_EAX) 
3808                                 x86_push_reg (code, X86_EAX);
3809
3810                         EMIT_FPCOMPARE(code);
3811                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3812                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3813                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3814                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3815
3816                         if (ins->dreg != X86_EAX) 
3817                                 x86_pop_reg (code, X86_EAX);
3818                         break;
3819                 case OP_FCLT:
3820                 case OP_FCLT_UN:
3821                         if (cfg->opt & MONO_OPT_FCMOV) {
3822                                 /* zeroing the register at the start results in 
3823                                  * shorter and faster code (we can also remove the widening op)
3824                                  */
3825                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3826                                 x86_fcomip (code, 1);
3827                                 x86_fstp (code, 0);
3828                                 if (ins->opcode == OP_FCLT_UN) {
3829                                         guchar *unordered_check = code;
3830                                         guchar *jump_to_end;
3831                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3832                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3833                                         jump_to_end = code;
3834                                         x86_jump8 (code, 0);
3835                                         x86_patch (unordered_check, code);
3836                                         x86_inc_reg (code, ins->dreg);
3837                                         x86_patch (jump_to_end, code);
3838                                 } else {
3839                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3840                                 }
3841                                 break;
3842                         }
3843                         if (ins->dreg != X86_EAX) 
3844                                 x86_push_reg (code, X86_EAX);
3845
3846                         EMIT_FPCOMPARE(code);
3847                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3848                         if (ins->opcode == OP_FCLT_UN) {
3849                                 guchar *is_not_zero_check, *end_jump;
3850                                 is_not_zero_check = code;
3851                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3852                                 end_jump = code;
3853                                 x86_jump8 (code, 0);
3854                                 x86_patch (is_not_zero_check, code);
3855                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3856
3857                                 x86_patch (end_jump, code);
3858                         }
3859                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3860                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3861
3862                         if (ins->dreg != X86_EAX) 
3863                                 x86_pop_reg (code, X86_EAX);
3864                         break;
3865                 case OP_FCGT:
3866                 case OP_FCGT_UN:
3867                         if (cfg->opt & MONO_OPT_FCMOV) {
3868                                 /* zeroing the register at the start results in 
3869                                  * shorter and faster code (we can also remove the widening op)
3870                                  */
3871                                 guchar *unordered_check;
3872                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3873                                 x86_fcomip (code, 1);
3874                                 x86_fstp (code, 0);
3875                                 if (ins->opcode == OP_FCGT) {
3876                                         unordered_check = code;
3877                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3878                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3879                                         x86_patch (unordered_check, code);
3880                                 } else {
3881                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3882                                 }
3883                                 break;
3884                         }
3885                         if (ins->dreg != X86_EAX) 
3886                                 x86_push_reg (code, X86_EAX);
3887
3888                         EMIT_FPCOMPARE(code);
3889                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3890                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3891                         if (ins->opcode == OP_FCGT_UN) {
3892                                 guchar *is_not_zero_check, *end_jump;
3893                                 is_not_zero_check = code;
3894                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3895                                 end_jump = code;
3896                                 x86_jump8 (code, 0);
3897                                 x86_patch (is_not_zero_check, code);
3898                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3899         
3900                                 x86_patch (end_jump, code);
3901                         }
3902                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3903                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3904
3905                         if (ins->dreg != X86_EAX) 
3906                                 x86_pop_reg (code, X86_EAX);
3907                         break;
3908                 case OP_FBEQ:
3909                         if (cfg->opt & MONO_OPT_FCMOV) {
3910                                 guchar *jump = code;
3911                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3912                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3913                                 x86_patch (jump, code);
3914                                 break;
3915                         }
3916                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3917                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3918                         break;
3919                 case OP_FBNE_UN:
3920                         /* Branch if C013 != 100 */
3921                         if (cfg->opt & MONO_OPT_FCMOV) {
3922                                 /* branch if !ZF or (PF|CF) */
3923                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3924                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3925                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3926                                 break;
3927                         }
3928                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3929                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3930                         break;
3931                 case OP_FBLT:
3932                         if (cfg->opt & MONO_OPT_FCMOV) {
3933                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3934                                 break;
3935                         }
3936                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3937                         break;
3938                 case OP_FBLT_UN:
3939                         if (cfg->opt & MONO_OPT_FCMOV) {
3940                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3941                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3942                                 break;
3943                         }
3944                         if (ins->opcode == OP_FBLT_UN) {
3945                                 guchar *is_not_zero_check, *end_jump;
3946                                 is_not_zero_check = code;
3947                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3948                                 end_jump = code;
3949                                 x86_jump8 (code, 0);
3950                                 x86_patch (is_not_zero_check, code);
3951                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3952
3953                                 x86_patch (end_jump, code);
3954                         }
3955                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3956                         break;
3957                 case OP_FBGT:
3958                 case OP_FBGT_UN:
3959                         if (cfg->opt & MONO_OPT_FCMOV) {
3960                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3961                                 break;
3962                         }
3963                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3964                         if (ins->opcode == OP_FBGT_UN) {
3965                                 guchar *is_not_zero_check, *end_jump;
3966                                 is_not_zero_check = code;
3967                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3968                                 end_jump = code;
3969                                 x86_jump8 (code, 0);
3970                                 x86_patch (is_not_zero_check, code);
3971                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3972
3973                                 x86_patch (end_jump, code);
3974                         }
3975                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3976                         break;
3977                 case OP_FBGE:
3978                         /* Branch if C013 == 100 or 001 */
3979                         if (cfg->opt & MONO_OPT_FCMOV) {
3980                                 guchar *br1;
3981
3982                                 /* skip branch if C1=1 */
3983                                 br1 = code;
3984                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3985                                 /* branch if (C0 | C3) = 1 */
3986                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3987                                 x86_patch (br1, code);
3988                                 break;
3989                         }
3990                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3991                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3992                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3993                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3994                         break;
3995                 case OP_FBGE_UN:
3996                         /* Branch if C013 == 000 */
3997                         if (cfg->opt & MONO_OPT_FCMOV) {
3998                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3999                                 break;
4000                         }
4001                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4002                         break;
4003                 case OP_FBLE:
4004                         /* Branch if C013=000 or 100 */
4005                         if (cfg->opt & MONO_OPT_FCMOV) {
4006                                 guchar *br1;
4007
4008                                 /* skip branch if C1=1 */
4009                                 br1 = code;
4010                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
4011                                 /* branch if C0=0 */
4012                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
4013                                 x86_patch (br1, code);
4014                                 break;
4015                         }
4016                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
4017                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
4018                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
4019                         break;
4020                 case OP_FBLE_UN:
4021                         /* Branch if C013 != 001 */
4022                         if (cfg->opt & MONO_OPT_FCMOV) {
4023                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4024                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
4025                                 break;
4026                         }
4027                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
4028                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4029                         break;
4030                 case CEE_CKFINITE: {
4031                         x86_push_reg (code, X86_EAX);
4032                         x86_fxam (code);
4033                         x86_fnstsw (code);
4034                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
4035                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
4036                         x86_pop_reg (code, X86_EAX);
4037                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
4038                         break;
4039                 }
4040                 case OP_TLS_GET: {
4041                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
4042                         break;
4043                 }
4044                 case OP_ATOMIC_ADD_I4: {
4045                         int dreg = ins->dreg;
4046
4047                         if (dreg == ins->inst_basereg) {
4048                                 x86_push_reg (code, ins->sreg2);
4049                                 dreg = ins->sreg2;
4050                         } 
4051                         
4052                         if (dreg != ins->sreg2)
4053                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
4054
4055                         x86_prefix (code, X86_LOCK_PREFIX);
4056                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4057
4058                         if (dreg != ins->dreg) {
4059                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4060                                 x86_pop_reg (code, dreg);
4061                         }
4062
4063                         break;
4064                 }
4065                 case OP_ATOMIC_ADD_NEW_I4: {
4066                         int dreg = ins->dreg;
4067
4068                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
4069                         if (ins->sreg2 == dreg) {
4070                                 if (dreg == X86_EBX) {
4071                                         dreg = X86_EDI;
4072                                         if (ins->inst_basereg == X86_EDI)
4073                                                 dreg = X86_ESI;
4074                                 } else {
4075                                         dreg = X86_EBX;
4076                                         if (ins->inst_basereg == X86_EBX)
4077                                                 dreg = X86_EDI;
4078                                 }
4079                         } else if (ins->inst_basereg == dreg) {
4080                                 if (dreg == X86_EBX) {
4081                                         dreg = X86_EDI;
4082                                         if (ins->sreg2 == X86_EDI)
4083                                                 dreg = X86_ESI;
4084                                 } else {
4085                                         dreg = X86_EBX;
4086                                         if (ins->sreg2 == X86_EBX)
4087                                                 dreg = X86_EDI;
4088                                 }
4089                         }
4090
4091                         if (dreg != ins->dreg) {
4092                                 x86_push_reg (code, dreg);
4093                         }
4094
4095                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
4096                         x86_prefix (code, X86_LOCK_PREFIX);
4097                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4098                         /* dreg contains the old value, add with sreg2 value */
4099                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
4100                         
4101                         if (ins->dreg != dreg) {
4102                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4103                                 x86_pop_reg (code, dreg);
4104                         }
4105
4106                         break;
4107                 }
4108                 case OP_ATOMIC_EXCHANGE_I4: {
4109                         guchar *br[2];
4110                         int sreg2 = ins->sreg2;
4111                         int breg = ins->inst_basereg;
4112
4113                         /* cmpxchg uses eax as comperand, need to make sure we can use it
4114                          * hack to overcome limits in x86 reg allocator 
4115                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
4116                          */
4117                         if (ins->dreg != X86_EAX)
4118                                 x86_push_reg (code, X86_EAX);
4119                         
4120                         /* We need the EAX reg for the cmpxchg */
4121                         if (ins->sreg2 == X86_EAX) {
4122                                 x86_push_reg (code, X86_EDX);
4123                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
4124                                 sreg2 = X86_EDX;
4125                         }
4126
4127                         if (breg == X86_EAX) {
4128                                 x86_push_reg (code, X86_ESI);
4129                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
4130                                 breg = X86_ESI;
4131                         }
4132
4133                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
4134
4135                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
4136                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
4137                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
4138                         x86_patch (br [1], br [0]);
4139
4140                         if (breg != ins->inst_basereg)
4141                                 x86_pop_reg (code, X86_ESI);
4142
4143                         if (ins->dreg != X86_EAX) {
4144                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
4145                                 x86_pop_reg (code, X86_EAX);
4146                         }
4147
4148                         if (ins->sreg2 != sreg2)
4149                                 x86_pop_reg (code, X86_EDX);
4150
4151                         break;
4152                 }
4153                 default:
4154                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
4155                         g_assert_not_reached ();
4156                 }
4157
4158                 if ((code - cfg->native_code - offset) > max_len) {
4159                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4160                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4161                         g_assert_not_reached ();
4162                 }
4163                
4164                 cpos += max_len;
4165
4166                 last_ins = ins;
4167                 last_offset = offset;
4168                 
4169                 ins = ins->next;
4170         }
4171
4172         cfg->code_len = code - cfg->native_code;
4173 }
4174
4175 void
4176 mono_arch_register_lowlevel_calls (void)
4177 {
4178 }
4179
4180 void
4181 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4182 {
4183         MonoJumpInfo *patch_info;
4184         gboolean compile_aot = !run_cctors;
4185
4186         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4187                 unsigned char *ip = patch_info->ip.i + code;
4188                 const unsigned char *target;
4189
4190                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4191
4192                 if (compile_aot) {
4193                         switch (patch_info->type) {
4194                         case MONO_PATCH_INFO_BB:
4195                         case MONO_PATCH_INFO_LABEL:
4196                                 break;
4197                         default:
4198                                 /* No need to patch these */
4199                                 continue;
4200                         }
4201                 }
4202
4203                 switch (patch_info->type) {
4204                 case MONO_PATCH_INFO_IP:
4205                         *((gconstpointer *)(ip)) = target;
4206                         break;
4207                 case MONO_PATCH_INFO_CLASS_INIT: {
4208                         guint8 *code = ip;
4209                         /* Might already been changed to a nop */
4210                         x86_call_code (code, 0);
4211                         x86_patch (ip, target);
4212                         break;
4213                 }
4214                 case MONO_PATCH_INFO_ABS:
4215                 case MONO_PATCH_INFO_METHOD:
4216                 case MONO_PATCH_INFO_METHOD_JUMP:
4217                 case MONO_PATCH_INFO_INTERNAL_METHOD:
4218                 case MONO_PATCH_INFO_BB:
4219                 case MONO_PATCH_INFO_LABEL:
4220                         x86_patch (ip, target);
4221                         break;
4222                 case MONO_PATCH_INFO_NONE:
4223                         break;
4224                 default: {
4225                         guint32 offset = mono_arch_get_patch_offset (ip);
4226                         *((gconstpointer *)(ip + offset)) = target;
4227                         break;
4228                 }
4229                 }
4230         }
4231 }
4232
4233 guint8 *
4234 mono_arch_emit_prolog (MonoCompile *cfg)
4235 {
4236         MonoMethod *method = cfg->method;
4237         MonoBasicBlock *bb;
4238         MonoMethodSignature *sig;
4239         MonoInst *inst;
4240         int alloc_size, pos, max_offset, i;
4241         guint8 *code;
4242
4243         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
4244         code = cfg->native_code = g_malloc (cfg->code_size);
4245
4246         x86_push_reg (code, X86_EBP);
4247         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4248
4249         alloc_size = - cfg->stack_offset;
4250         pos = 0;
4251
4252         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4253                 /* Might need to attach the thread to the JIT */
4254                 if (lmf_tls_offset != -1) {
4255                         guint8 *buf;
4256
4257                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
4258 #ifdef PLATFORM_WIN32
4259                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4260                         /* FIXME: Add a separate key for LMF to avoid this */
4261                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4262 #endif
4263                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
4264                         buf = code;
4265                         x86_branch8 (code, X86_CC_NE, 0, 0);
4266                         x86_push_imm (code, cfg->domain);
4267                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4268                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4269                         x86_patch (buf, code);
4270                 }
4271                 else {
4272                         g_assert (!cfg->compile_aot);
4273                         x86_push_imm (code, cfg->domain);
4274                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4275                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4276                 }
4277         }
4278
4279         if (method->save_lmf) {
4280                 pos += sizeof (MonoLMF);
4281
4282                 /* save the current IP */
4283                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4284                 x86_push_imm_template (code);
4285
4286                 /* save all caller saved regs */
4287                 x86_push_reg (code, X86_EBP);
4288                 x86_push_reg (code, X86_ESI);
4289                 x86_push_reg (code, X86_EDI);
4290                 x86_push_reg (code, X86_EBX);
4291
4292                 /* save method info */
4293                 x86_push_imm (code, method);
4294
4295                 /* get the address of lmf for the current thread */
4296                 /* 
4297                  * This is performance critical so we try to use some tricks to make
4298                  * it fast.
4299                  */
4300                 if (lmf_tls_offset != -1) {
4301                         /* Load lmf quicky using the GS register */
4302                         code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
4303 #ifdef PLATFORM_WIN32
4304                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4305                         /* FIXME: Add a separate key for LMF to avoid this */
4306                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4307 #endif
4308                 }
4309                 else {
4310                         if (cfg->compile_aot) {
4311                                 /* The GOT var does not exist yet */
4312                                 x86_call_imm (code, 0);
4313                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4314                                 x86_pop_reg (code, X86_EAX);
4315                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4316                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4317                                 x86_call_membase (code, X86_EAX, 0xf0f0f0f0);
4318                         }
4319                         else
4320                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4321                 }
4322
4323                 /* push lmf */
4324                 x86_push_reg (code, X86_EAX); 
4325                 /* push *lfm (previous_lmf) */
4326                 x86_push_membase (code, X86_EAX, 0);
4327                 /* *(lmf) = ESP */
4328                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4329         } else {
4330
4331                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4332                         x86_push_reg (code, X86_EBX);
4333                         pos += 4;
4334                 }
4335
4336                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4337                         x86_push_reg (code, X86_EDI);
4338                         pos += 4;
4339                 }
4340
4341                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4342                         x86_push_reg (code, X86_ESI);
4343                         pos += 4;
4344                 }
4345         }
4346
4347         alloc_size -= pos;
4348
4349         if (alloc_size) {
4350                 /* See mono_emit_stack_alloc */
4351 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
4352                 guint32 remaining_size = alloc_size;
4353                 while (remaining_size >= 0x1000) {
4354                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4355                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4356                         remaining_size -= 0x1000;
4357                 }
4358                 if (remaining_size)
4359                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4360 #else
4361                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4362 #endif
4363         }
4364
4365         /* compute max_offset in order to use short forward jumps */
4366         max_offset = 0;
4367         if (cfg->opt & MONO_OPT_BRANCH) {
4368                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4369                         MonoInst *ins = bb->code;
4370                         bb->max_offset = max_offset;
4371
4372                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4373                                 max_offset += 6;
4374                         /* max alignment for loops */
4375                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4376                                 max_offset += LOOP_ALIGNMENT;
4377
4378                         while (ins) {
4379                                 if (ins->opcode == OP_LABEL)
4380                                         ins->inst_c1 = max_offset;
4381                                 
4382                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
4383                                 ins = ins->next;
4384                         }
4385                 }
4386         }
4387
4388         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4389                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4390
4391         /* load arguments allocated to register from the stack */
4392         sig = mono_method_signature (method);
4393         pos = 0;
4394
4395         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4396                 inst = cfg->varinfo [pos];
4397                 if (inst->opcode == OP_REGVAR) {
4398                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4399                         if (cfg->verbose_level > 2)
4400                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4401                 }
4402                 pos++;
4403         }
4404
4405         cfg->code_len = code - cfg->native_code;
4406
4407         return code;
4408 }
4409
4410 void
4411 mono_arch_emit_epilog (MonoCompile *cfg)
4412 {
4413         MonoMethod *method = cfg->method;
4414         MonoMethodSignature *sig = mono_method_signature (method);
4415         int quad, pos;
4416         guint32 stack_to_pop;
4417         guint8 *code;
4418         int max_epilog_size = 16;
4419         CallInfo *cinfo;
4420         
4421         if (cfg->method->save_lmf)
4422                 max_epilog_size += 128;
4423         
4424         if (mono_jit_trace_calls != NULL)
4425                 max_epilog_size += 50;
4426
4427         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4428                 cfg->code_size *= 2;
4429                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4430                 mono_jit_stats.code_reallocs++;
4431         }
4432
4433         code = cfg->native_code + cfg->code_len;
4434
4435         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4436                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4437
4438         /* the code restoring the registers must be kept in sync with CEE_JMP */
4439         pos = 0;
4440         
4441         if (method->save_lmf) {
4442                 gint32 prev_lmf_reg;
4443
4444                 /* Find a spare register */
4445                 switch (sig->ret->type) {
4446                 case MONO_TYPE_I8:
4447                 case MONO_TYPE_U8:
4448                         prev_lmf_reg = X86_EDI;
4449                         cfg->used_int_regs |= (1 << X86_EDI);
4450                         break;
4451                 default:
4452                         prev_lmf_reg = X86_EDX;
4453                         break;
4454                 }
4455
4456                 /* reg = previous_lmf */
4457                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, -32, 4);
4458
4459                 /* ecx = lmf */
4460                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, -28, 4);
4461
4462                 /* *(lmf) = previous_lmf */
4463                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4464
4465                 /* restore caller saved regs */
4466                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4467                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, -20, 4);
4468                 }
4469
4470                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4471                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, -16, 4);
4472                 }
4473                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4474                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, -12, 4);
4475                 }
4476
4477                 /* EBP is restored by LEAVE */
4478         } else {
4479                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4480                         pos -= 4;
4481                 }
4482                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4483                         pos -= 4;
4484                 }
4485                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4486                         pos -= 4;
4487                 }
4488
4489                 if (pos)
4490                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4491
4492                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4493                         x86_pop_reg (code, X86_ESI);
4494                 }
4495                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4496                         x86_pop_reg (code, X86_EDI);
4497                 }
4498                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4499                         x86_pop_reg (code, X86_EBX);
4500                 }
4501         }
4502
4503         /* Load returned vtypes into registers if needed */
4504         cinfo = get_call_info (sig, FALSE);
4505         if (cinfo->ret.storage == ArgValuetypeInReg) {
4506                 for (quad = 0; quad < 2; quad ++) {
4507                         switch (cinfo->ret.pair_storage [quad]) {
4508                         case ArgInIReg:
4509                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
4510                                 break;
4511                         case ArgOnFloatFpStack:
4512                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
4513                                 break;
4514                         case ArgOnDoubleFpStack:
4515                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
4516                                 break;
4517                         case ArgNone:
4518                                 break;
4519                         default:
4520                                 g_assert_not_reached ();
4521                         }
4522                 }
4523         }
4524
4525         x86_leave (code);
4526
4527         if (CALLCONV_IS_STDCALL (sig)) {
4528                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4529
4530                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4531         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
4532                 stack_to_pop = 4;
4533         else
4534                 stack_to_pop = 0;
4535
4536         if (stack_to_pop)
4537                 x86_ret_imm (code, stack_to_pop);
4538         else
4539                 x86_ret (code);
4540
4541         g_free (cinfo);
4542
4543         cfg->code_len = code - cfg->native_code;
4544
4545         g_assert (cfg->code_len < cfg->code_size);
4546 }
4547
4548 void
4549 mono_arch_emit_exceptions (MonoCompile *cfg)
4550 {
4551         MonoJumpInfo *patch_info;
4552         int nthrows, i;
4553         guint8 *code;
4554         MonoClass *exc_classes [16];
4555         guint8 *exc_throw_start [16], *exc_throw_end [16];
4556         guint32 code_size;
4557         int exc_count = 0;
4558
4559         /* Compute needed space */
4560         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4561                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4562                         exc_count++;
4563         }
4564
4565         /* 
4566          * make sure we have enough space for exceptions
4567          * 16 is the size of two push_imm instructions and a call
4568          */
4569         if (cfg->compile_aot)
4570                 code_size = exc_count * 32;
4571         else
4572                 code_size = exc_count * 16;
4573
4574         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4575                 cfg->code_size *= 2;
4576                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4577                 mono_jit_stats.code_reallocs++;
4578         }
4579
4580         code = cfg->native_code + cfg->code_len;
4581
4582         nthrows = 0;
4583         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4584                 switch (patch_info->type) {
4585                 case MONO_PATCH_INFO_EXC: {
4586                         MonoClass *exc_class;
4587                         guint8 *buf, *buf2;
4588                         guint32 throw_ip;
4589
4590                         x86_patch (patch_info->ip.i + cfg->native_code, code);
4591
4592                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4593                         g_assert (exc_class);
4594                         throw_ip = patch_info->ip.i;
4595
4596                         /* Find a throw sequence for the same exception class */
4597                         for (i = 0; i < nthrows; ++i)
4598                                 if (exc_classes [i] == exc_class)
4599                                         break;
4600                         if (i < nthrows) {
4601                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4602                                 x86_jump_code (code, exc_throw_start [i]);
4603                                 patch_info->type = MONO_PATCH_INFO_NONE;
4604                         }
4605                         else {
4606                                 guint32 got_reg = X86_EAX;
4607                                 guint32 size;
4608
4609                                 /* Compute size of code following the push <OFFSET> */
4610                                 if (cfg->compile_aot) {
4611                                         size = 5 + 6;
4612                                         if (!cfg->got_var)
4613                                                 size += 32;
4614                                         else if (cfg->got_var->opcode == OP_REGOFFSET)
4615                                                 size += 6;
4616                                 }
4617                                 else
4618                                         size = 5 + 5;
4619
4620                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4621                                         /* Use the shorter form */
4622                                         buf = buf2 = code;
4623                                         x86_push_imm (code, 0);
4624                                 }
4625                                 else {
4626                                         buf = code;
4627                                         x86_push_imm (code, 0xf0f0f0f0);
4628                                         buf2 = code;
4629                                 }
4630
4631                                 if (nthrows < 16) {
4632                                         exc_classes [nthrows] = exc_class;
4633                                         exc_throw_start [nthrows] = code;
4634                                 }
4635
4636                                 if (cfg->compile_aot) {          
4637                                         /*
4638                                          * Since the patches are generated by the back end, there is                                     * no way to generate a got_var at this point.   
4639                                          */
4640                                         if (!cfg->got_var) {
4641                                                 x86_call_imm (code, 0);
4642                                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4643                                                 x86_pop_reg (code, X86_EAX);
4644                                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4645                                         }
4646                                         else {
4647                                                 if (cfg->got_var->opcode == OP_REGOFFSET)
4648                                                         x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
4649                                                 else
4650                                                         got_reg = cfg->got_var->dreg;
4651                                         }
4652                                 }
4653
4654                                 x86_push_imm (code, exc_class->type_token);
4655                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4656                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4657                                 patch_info->ip.i = code - cfg->native_code;
4658                                 if (cfg->compile_aot)
4659                                         x86_call_membase (code, got_reg, 0xf0f0f0f0);
4660                                 else
4661                                         x86_call_code (code, 0);
4662                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4663                                 while (buf < buf2)
4664                                         x86_nop (buf);
4665
4666                                 if (nthrows < 16) {
4667                                         exc_throw_end [nthrows] = code;
4668                                         nthrows ++;
4669                                 }
4670                         }
4671                         break;
4672                 }
4673                 default:
4674                         /* do nothing */
4675                         break;
4676                 }
4677         }
4678
4679         cfg->code_len = code - cfg->native_code;
4680
4681         g_assert (cfg->code_len < cfg->code_size);
4682 }
4683
4684 void
4685 mono_arch_flush_icache (guint8 *code, gint size)
4686 {
4687         /* not needed */
4688 }
4689
4690 void
4691 mono_arch_flush_register_windows (void)
4692 {
4693 }
4694
4695 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4696
4697 static void
4698 setup_stack (MonoJitTlsData *tls)
4699 {
4700         pthread_t self = pthread_self();
4701         pthread_attr_t attr;
4702         size_t stsize = 0;
4703         struct sigaltstack sa;
4704         guint8 *staddr = NULL;
4705         guint8 *current = (guint8*)&staddr;
4706
4707         if (mono_running_on_valgrind ())
4708                 return;
4709
4710         /* Determine stack boundaries */
4711         pthread_attr_init( &attr );
4712 #ifdef HAVE_PTHREAD_GETATTR_NP
4713         pthread_getattr_np( self, &attr );
4714 #else
4715 #ifdef HAVE_PTHREAD_ATTR_GET_NP
4716         pthread_attr_get_np( self, &attr );
4717 #elif defined(sun)
4718         pthread_attr_getstacksize( &attr, &stsize );
4719 #else
4720 #error "Not implemented"
4721 #endif
4722 #endif
4723 #ifndef sun
4724         pthread_attr_getstack( &attr, (void**)&staddr, &stsize );
4725 #endif
4726
4727         g_assert (staddr);
4728
4729         g_assert ((current > staddr) && (current < staddr + stsize));
4730
4731         tls->end_of_stack = staddr + stsize;
4732
4733         /*
4734          * threads created by nptl does not seem to have a guard page, and
4735          * since the main thread is not created by us, we can't even set one.
4736          * Increasing stsize fools the SIGSEGV signal handler into thinking this
4737          * is a stack overflow exception.
4738          */
4739         tls->stack_size = stsize + getpagesize ();
4740
4741         /* Setup an alternate signal stack */
4742         tls->signal_stack = mmap (0, SIGNAL_STACK_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
4743         tls->signal_stack_size = SIGNAL_STACK_SIZE;
4744
4745         g_assert (tls->signal_stack);
4746
4747         sa.ss_sp = tls->signal_stack;
4748         sa.ss_size = SIGNAL_STACK_SIZE;
4749         sa.ss_flags = SS_ONSTACK;
4750         sigaltstack (&sa, NULL);
4751 }
4752
4753 #endif
4754
4755 /*
4756  * Support for fast access to the thread-local lmf structure using the GS
4757  * segment register on NPTL + kernel 2.6.x.
4758  */
4759
4760 static gboolean tls_offset_inited = FALSE;
4761
4762 void
4763 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4764 {
4765         if (!tls_offset_inited) {
4766                 if (!getenv ("MONO_NO_TLS")) {
4767 #ifdef PLATFORM_WIN32
4768                         /* 
4769                          * We need to init this multiple times, since when we are first called, the key might not
4770                          * be initialized yet.
4771                          */
4772                         appdomain_tls_offset = mono_domain_get_tls_key ();
4773                         lmf_tls_offset = mono_get_jit_tls_key ();
4774                         thread_tls_offset = mono_thread_get_tls_key ();
4775
4776                         /* Only 64 tls entries can be accessed using inline code */
4777                         if (appdomain_tls_offset >= 64)
4778                                 appdomain_tls_offset = -1;
4779                         if (lmf_tls_offset >= 64)
4780                                 lmf_tls_offset = -1;
4781                         if (thread_tls_offset >= 64)
4782                                 thread_tls_offset = -1;
4783 #else
4784                         tls_offset_inited = TRUE;
4785                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4786                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4787                         thread_tls_offset = mono_thread_get_tls_offset ();
4788 #endif
4789                 }
4790         }               
4791
4792 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4793         setup_stack (tls);
4794 #endif
4795 }
4796
4797 void
4798 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4799 {
4800 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4801         struct sigaltstack sa;
4802
4803         sa.ss_sp = tls->signal_stack;
4804         sa.ss_size = SIGNAL_STACK_SIZE;
4805         sa.ss_flags = SS_DISABLE;
4806         sigaltstack  (&sa, NULL);
4807
4808         if (tls->signal_stack)
4809                 munmap (tls->signal_stack, SIGNAL_STACK_SIZE);
4810 #endif
4811 }
4812
4813 void
4814 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4815 {
4816
4817         /* add the this argument */
4818         if (this_reg != -1) {
4819                 MonoInst *this;
4820                 MONO_INST_NEW (cfg, this, OP_OUTARG);
4821                 this->type = this_type;
4822                 this->sreg1 = this_reg;
4823                 mono_bblock_add_inst (cfg->cbb, this);
4824         }
4825
4826         if (vt_reg != -1) {
4827                 CallInfo * cinfo = get_call_info (inst->signature, FALSE);
4828                 MonoInst *vtarg;
4829
4830                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4831                         /*
4832                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4833                          * the stack. Save the address here, so the call instruction can
4834                          * access it.
4835                          */
4836                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4837                         vtarg->inst_destbasereg = X86_ESP;
4838                         vtarg->inst_offset = inst->stack_usage;
4839                         vtarg->sreg1 = vt_reg;
4840                         mono_bblock_add_inst (cfg->cbb, vtarg);
4841                 }
4842                 else {
4843                         MonoInst *vtarg;
4844                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4845                         vtarg->type = STACK_MP;
4846                         vtarg->sreg1 = vt_reg;
4847                         mono_bblock_add_inst (cfg->cbb, vtarg);
4848                 }
4849
4850                 g_free (cinfo);
4851         }
4852 }
4853
4854
4855 MonoInst*
4856 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4857 {
4858         MonoInst *ins = NULL;
4859
4860         if (cmethod->klass == mono_defaults.math_class) {
4861                 if (strcmp (cmethod->name, "Sin") == 0) {
4862                         MONO_INST_NEW (cfg, ins, OP_SIN);
4863                         ins->inst_i0 = args [0];
4864                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4865                         MONO_INST_NEW (cfg, ins, OP_COS);
4866                         ins->inst_i0 = args [0];
4867                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4868                         MONO_INST_NEW (cfg, ins, OP_TAN);
4869                         ins->inst_i0 = args [0];
4870                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4871                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4872                         ins->inst_i0 = args [0];
4873                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4874                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4875                         ins->inst_i0 = args [0];
4876                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4877                         MONO_INST_NEW (cfg, ins, OP_ABS);
4878                         ins->inst_i0 = args [0];
4879                 }
4880 #if 0
4881                 /* OP_FREM is not IEEE compatible */
4882                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4883                         MONO_INST_NEW (cfg, ins, OP_FREM);
4884                         ins->inst_i0 = args [0];
4885                         ins->inst_i1 = args [1];
4886                 }
4887 #endif
4888         } else if(cmethod->klass->image == mono_defaults.corlib &&
4889                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4890                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4891
4892                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4893                         MonoInst *ins_iconst;
4894
4895                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4896                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4897                         ins_iconst->inst_c0 = 1;
4898
4899                         ins->inst_i0 = args [0];
4900                         ins->inst_i1 = ins_iconst;
4901                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4902                         MonoInst *ins_iconst;
4903
4904                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4905                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4906                         ins_iconst->inst_c0 = -1;
4907
4908                         ins->inst_i0 = args [0];
4909                         ins->inst_i1 = ins_iconst;
4910                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4911                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4912
4913                         ins->inst_i0 = args [0];
4914                         ins->inst_i1 = args [1];
4915                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4916                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_I4);
4917
4918                         ins->inst_i0 = args [0];
4919                         ins->inst_i1 = args [1];
4920                 }
4921         }
4922
4923         return ins;
4924 }
4925
4926
4927 gboolean
4928 mono_arch_print_tree (MonoInst *tree, int arity)
4929 {
4930         return 0;
4931 }
4932
4933 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4934 {
4935         MonoInst* ins;
4936         
4937         if (appdomain_tls_offset == -1)
4938                 return NULL;
4939
4940         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4941         ins->inst_offset = appdomain_tls_offset;
4942         return ins;
4943 }
4944
4945 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4946 {
4947         MonoInst* ins;
4948
4949         if (thread_tls_offset == -1)
4950                 return NULL;
4951
4952         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4953         ins->inst_offset = thread_tls_offset;
4954         return ins;
4955 }
4956
4957 guint32
4958 mono_arch_get_patch_offset (guint8 *code)
4959 {
4960         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4961                 return 2;
4962         else if ((code [0] == 0xba))
4963                 return 1;
4964         else if ((code [0] == 0x68))
4965                 /* push IMM */
4966                 return 1;
4967         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4968                 /* push <OFFSET>(<REG>) */
4969                 return 2;
4970         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4971                 /* call *<OFFSET>(<REG>) */
4972                 return 2;
4973         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4974                 /* fldl <ADDR> */
4975                 return 2;
4976         else if ((code [0] == 0x58) && (code [1] == 0x05))
4977                 /* pop %eax; add <OFFSET>, %eax */
4978                 return 2;
4979         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4980                 /* pop <REG>; add <OFFSET>, <REG> */
4981                 return 3;
4982         else {
4983                 g_assert_not_reached ();
4984                 return -1;
4985         }
4986 }
4987
4988 gpointer*
4989 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4990 {
4991         guint8 reg = 0;
4992         gint32 disp = 0;
4993
4994         /* go to the start of the call instruction
4995          *
4996          * address_byte = (m << 6) | (o << 3) | reg
4997          * call opcode: 0xff address_byte displacement
4998          * 0xff m=1,o=2 imm8
4999          * 0xff m=2,o=2 imm32
5000          */
5001         code -= 6;
5002         if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
5003                 reg = code [4] & 0x07;
5004                 disp = (signed char)code [5];
5005         } else {
5006                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
5007                         reg = code [1] & 0x07;
5008                         disp = *((gint32*)(code + 2));
5009                 } else if ((code [1] == 0xe8)) {
5010                         return NULL;
5011                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
5012                         /*
5013                          * This is a interface call: should check the above code can't catch it earlier 
5014                          * 8b 40 30   mov    0x30(%eax),%eax
5015                          * ff 10      call   *(%eax)
5016                          */
5017                         disp = 0;
5018                         reg = code [5] & 0x07;
5019                 }
5020                 else
5021                         return NULL;
5022         }
5023
5024         return (gpointer*)(((gint32)(regs [reg])) + disp);
5025 }
5026
5027 gpointer* 
5028 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
5029 {
5030         guint8 reg = 0;
5031         gint32 disp = 0;
5032
5033         code -= 7;
5034         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
5035                 reg = x86_modrm_rm (code [1]);
5036                 disp = code [4];
5037
5038                 if (reg == X86_EAX)
5039                         return NULL;
5040                 else
5041                         return (gpointer*)(((gint32)(regs [reg])) + disp);
5042         }
5043
5044         return NULL;
5045 }