2005-04-04 Zoltan Varga <vargaz@freemail.hu>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14
15 #ifndef PLATFORM_WIN32
16 #include <unistd.h>
17 #include <sys/mman.h>
18 #endif
19
20 #include <mono/metadata/appdomain.h>
21 #include <mono/metadata/debug-helpers.h>
22 #include <mono/metadata/threads.h>
23 #include <mono/metadata/profiler-private.h>
24 #include <mono/utils/mono-math.h>
25
26 #include "trace.h"
27 #include "mini-x86.h"
28 #include "inssel.h"
29 #include "cpu-pentium.h"
30
31 /* On windows, these hold the key returned by TlsAlloc () */
32 static gint lmf_tls_offset = -1;
33 static gint appdomain_tls_offset = -1;
34 static gint thread_tls_offset = -1;
35
36 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
37
38 #ifdef PLATFORM_WIN32
39 /* Under windows, the default pinvoke calling convention is stdcall */
40 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
41 #else
42 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
43 #endif
44
45 #define SIGNAL_STACK_SIZE (64 * 1024)
46
47 #define NOT_IMPLEMENTED g_assert_not_reached ()
48
49 const char*
50 mono_arch_regname (int reg) {
51         switch (reg) {
52         case X86_EAX: return "%eax";
53         case X86_EBX: return "%ebx";
54         case X86_ECX: return "%ecx";
55         case X86_EDX: return "%edx";
56         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
57         case X86_EDI: return "%edi";
58         case X86_ESI: return "%esi";
59         }
60         return "unknown";
61 }
62
63 typedef enum {
64         ArgInIReg,
65         ArgInFloatSSEReg,
66         ArgInDoubleSSEReg,
67         ArgOnStack,
68         ArgValuetypeInReg,
69         ArgOnFloatFpStack,
70         ArgOnDoubleFpStack,
71         ArgNone
72 } ArgStorage;
73
74 typedef struct {
75         gint16 offset;
76         gint8  reg;
77         ArgStorage storage;
78
79         /* Only if storage == ArgValuetypeInReg */
80         ArgStorage pair_storage [2];
81         gint8 pair_regs [2];
82 } ArgInfo;
83
84 typedef struct {
85         int nargs;
86         guint32 stack_usage;
87         guint32 reg_usage;
88         guint32 freg_usage;
89         gboolean need_stack_align;
90         ArgInfo ret;
91         ArgInfo sig_cookie;
92         ArgInfo args [1];
93 } CallInfo;
94
95 #define PARAM_REGS 0
96
97 #define FLOAT_PARAM_REGS 0
98
99 static X86_Reg_No param_regs [] = { 0 };
100
101 #ifdef PLATFORM_WIN32
102 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
103 #endif
104
105 static void inline
106 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
107 {
108     ainfo->offset = *stack_size;
109
110     if (*gr >= PARAM_REGS) {
111                 ainfo->storage = ArgOnStack;
112                 (*stack_size) += sizeof (gpointer);
113     }
114     else {
115                 ainfo->storage = ArgInIReg;
116                 ainfo->reg = param_regs [*gr];
117                 (*gr) ++;
118     }
119 }
120
121 static void inline
122 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
123 {
124         ainfo->offset = *stack_size;
125
126         g_assert (PARAM_REGS == 0);
127         
128         ainfo->storage = ArgOnStack;
129         (*stack_size) += sizeof (gpointer) * 2;
130 }
131
132 static void inline
133 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
134 {
135     ainfo->offset = *stack_size;
136
137     if (*gr >= FLOAT_PARAM_REGS) {
138                 ainfo->storage = ArgOnStack;
139                 (*stack_size) += sizeof (gpointer);
140     }
141     else {
142                 /* A double register */
143                 if (is_double)
144                         ainfo->storage = ArgInDoubleSSEReg;
145                 else
146                         ainfo->storage = ArgInFloatSSEReg;
147                 ainfo->reg = *gr;
148                 (*gr) += 1;
149     }
150 }
151
152
153 static void
154 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
155                gboolean is_return,
156                guint32 *gr, guint32 *fr, guint32 *stack_size)
157 {
158         guint32 size;
159         MonoClass *klass;
160
161         klass = mono_class_from_mono_type (type);
162         if (sig->pinvoke) 
163                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
164         else 
165                 size = mono_type_stack_size (&klass->byval_arg, NULL);
166
167 #ifdef PLATFORM_WIN32
168         if (sig->pinvoke && is_return) {
169                 MonoMarshalType *info;
170
171                 /*
172                  * the exact rules are not very well documented, the code below seems to work with the 
173                  * code generated by gcc 3.3.3 -mno-cygwin.
174                  */
175                 info = mono_marshal_load_type_info (klass);
176                 g_assert (info);
177
178                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
179
180                 /* Special case structs with only a float member */
181                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
182                         ainfo->storage = ArgValuetypeInReg;
183                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
184                         return;
185                 }
186                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
187                         ainfo->storage = ArgValuetypeInReg;
188                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
189                         return;
190                 }               
191                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
192                         ainfo->storage = ArgValuetypeInReg;
193                         ainfo->pair_storage [0] = ArgInIReg;
194                         ainfo->pair_regs [0] = return_regs [0];
195                         if (info->native_size > 4) {
196                                 ainfo->pair_storage [1] = ArgInIReg;
197                                 ainfo->pair_regs [1] = return_regs [1];
198                         }
199                         return;
200                 }
201         }
202 #endif
203
204         ainfo->offset = *stack_size;
205         ainfo->storage = ArgOnStack;
206         *stack_size += ALIGN_TO (size, sizeof (gpointer));
207 }
208
209 /*
210  * get_call_info:
211  *
212  *  Obtain information about a call according to the calling convention.
213  * For x86 ELF, see the "System V Application Binary Interface Intel386 
214  * Architecture Processor Supplment, Fourth Edition" document for more
215  * information.
216  * For x86 win32, see ???.
217  */
218 static CallInfo*
219 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
220 {
221         guint32 i, gr, fr;
222         MonoType *ret_type;
223         int n = sig->hasthis + sig->param_count;
224         guint32 stack_size = 0;
225         CallInfo *cinfo;
226
227         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
228
229         gr = 0;
230         fr = 0;
231
232         /* return value */
233         {
234                 ret_type = mono_type_get_underlying_type (sig->ret);
235                 switch (ret_type->type) {
236                 case MONO_TYPE_BOOLEAN:
237                 case MONO_TYPE_I1:
238                 case MONO_TYPE_U1:
239                 case MONO_TYPE_I2:
240                 case MONO_TYPE_U2:
241                 case MONO_TYPE_CHAR:
242                 case MONO_TYPE_I4:
243                 case MONO_TYPE_U4:
244                 case MONO_TYPE_I:
245                 case MONO_TYPE_U:
246                 case MONO_TYPE_PTR:
247                 case MONO_TYPE_FNPTR:
248                 case MONO_TYPE_CLASS:
249                 case MONO_TYPE_OBJECT:
250                 case MONO_TYPE_SZARRAY:
251                 case MONO_TYPE_ARRAY:
252                 case MONO_TYPE_STRING:
253                         cinfo->ret.storage = ArgInIReg;
254                         cinfo->ret.reg = X86_EAX;
255                         break;
256                 case MONO_TYPE_U8:
257                 case MONO_TYPE_I8:
258                         cinfo->ret.storage = ArgInIReg;
259                         cinfo->ret.reg = X86_EAX;
260                         break;
261                 case MONO_TYPE_R4:
262                         cinfo->ret.storage = ArgOnFloatFpStack;
263                         break;
264                 case MONO_TYPE_R8:
265                         cinfo->ret.storage = ArgOnDoubleFpStack;
266                         break;
267                 case MONO_TYPE_VALUETYPE: {
268                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
269
270                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
271                         if (cinfo->ret.storage == ArgOnStack)
272                                 /* The caller passes the address where the value is stored */
273                                 add_general (&gr, &stack_size, &cinfo->ret);
274                         break;
275                 }
276                 case MONO_TYPE_TYPEDBYREF:
277                         /* Same as a valuetype with size 24 */
278                         add_general (&gr, &stack_size, &cinfo->ret);
279                         ;
280                         break;
281                 case MONO_TYPE_VOID:
282                         cinfo->ret.storage = ArgNone;
283                         break;
284                 default:
285                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
286                 }
287         }
288
289         /* this */
290         if (sig->hasthis)
291                 add_general (&gr, &stack_size, cinfo->args + 0);
292
293         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
294                 gr = PARAM_REGS;
295                 fr = FLOAT_PARAM_REGS;
296                 
297                 /* Emit the signature cookie just before the implicit arguments */
298                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
299         }
300
301         for (i = 0; i < sig->param_count; ++i) {
302                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
303                 MonoType *ptype;
304
305                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
306                         /* We allways pass the sig cookie on the stack for simplicity */
307                         /* 
308                          * Prevent implicit arguments + the sig cookie from being passed 
309                          * in registers.
310                          */
311                         gr = PARAM_REGS;
312                         fr = FLOAT_PARAM_REGS;
313
314                         /* Emit the signature cookie just before the implicit arguments */
315                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
316                 }
317
318                 if (sig->params [i]->byref) {
319                         add_general (&gr, &stack_size, ainfo);
320                         continue;
321                 }
322                 ptype = mono_type_get_underlying_type (sig->params [i]);
323                 switch (ptype->type) {
324                 case MONO_TYPE_BOOLEAN:
325                 case MONO_TYPE_I1:
326                 case MONO_TYPE_U1:
327                         add_general (&gr, &stack_size, ainfo);
328                         break;
329                 case MONO_TYPE_I2:
330                 case MONO_TYPE_U2:
331                 case MONO_TYPE_CHAR:
332                         add_general (&gr, &stack_size, ainfo);
333                         break;
334                 case MONO_TYPE_I4:
335                 case MONO_TYPE_U4:
336                         add_general (&gr, &stack_size, ainfo);
337                         break;
338                 case MONO_TYPE_I:
339                 case MONO_TYPE_U:
340                 case MONO_TYPE_PTR:
341                 case MONO_TYPE_FNPTR:
342                 case MONO_TYPE_CLASS:
343                 case MONO_TYPE_OBJECT:
344                 case MONO_TYPE_STRING:
345                 case MONO_TYPE_SZARRAY:
346                 case MONO_TYPE_ARRAY:
347                         add_general (&gr, &stack_size, ainfo);
348                         break;
349                 case MONO_TYPE_VALUETYPE:
350                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
351                         break;
352                 case MONO_TYPE_TYPEDBYREF:
353                         stack_size += sizeof (MonoTypedRef);
354                         ainfo->storage = ArgOnStack;
355                         break;
356                 case MONO_TYPE_U8:
357                 case MONO_TYPE_I8:
358                         add_general_pair (&gr, &stack_size, ainfo);
359                         break;
360                 case MONO_TYPE_R4:
361                         add_float (&fr, &stack_size, ainfo, FALSE);
362                         break;
363                 case MONO_TYPE_R8:
364                         add_float (&fr, &stack_size, ainfo, TRUE);
365                         break;
366                 default:
367                         g_error ("unexpected type 0x%x", ptype->type);
368                         g_assert_not_reached ();
369                 }
370         }
371
372         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
373                 gr = PARAM_REGS;
374                 fr = FLOAT_PARAM_REGS;
375                 
376                 /* Emit the signature cookie just before the implicit arguments */
377                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
378         }
379
380         cinfo->stack_usage = stack_size;
381         cinfo->reg_usage = gr;
382         cinfo->freg_usage = fr;
383         return cinfo;
384 }
385
386 /*
387  * mono_arch_get_argument_info:
388  * @csig:  a method signature
389  * @param_count: the number of parameters to consider
390  * @arg_info: an array to store the result infos
391  *
392  * Gathers information on parameters such as size, alignment and
393  * padding. arg_info should be large enought to hold param_count + 1 entries. 
394  *
395  * Returns the size of the activation frame.
396  */
397 int
398 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
399 {
400         int k, frame_size = 0;
401         int size, align, pad;
402         int offset = 8;
403         CallInfo *cinfo;
404
405         cinfo = get_call_info (csig, FALSE);
406
407         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
408                 frame_size += sizeof (gpointer);
409                 offset += 4;
410         }
411
412         arg_info [0].offset = offset;
413
414         if (csig->hasthis) {
415                 frame_size += sizeof (gpointer);
416                 offset += 4;
417         }
418
419         arg_info [0].size = frame_size;
420
421         for (k = 0; k < param_count; k++) {
422                 
423                 if (csig->pinvoke)
424                         size = mono_type_native_stack_size (csig->params [k], &align);
425                 else
426                         size = mono_type_stack_size (csig->params [k], &align);
427
428                 /* ignore alignment for now */
429                 align = 1;
430
431                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
432                 arg_info [k].pad = pad;
433                 frame_size += size;
434                 arg_info [k + 1].pad = 0;
435                 arg_info [k + 1].size = size;
436                 offset += pad;
437                 arg_info [k + 1].offset = offset;
438                 offset += size;
439         }
440
441         align = MONO_ARCH_FRAME_ALIGNMENT;
442         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
443         arg_info [k].pad = pad;
444
445         g_free (cinfo);
446
447         return frame_size;
448 }
449
450 static const guchar cpuid_impl [] = {
451         0x55,                           /* push   %ebp */
452         0x89, 0xe5,                     /* mov    %esp,%ebp */
453         0x53,                           /* push   %ebx */
454         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
455         0x0f, 0xa2,                     /* cpuid   */
456         0x50,                           /* push   %eax */
457         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
458         0x89, 0x18,                     /* mov    %ebx,(%eax) */
459         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
460         0x89, 0x08,                     /* mov    %ecx,(%eax) */
461         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
462         0x89, 0x10,                     /* mov    %edx,(%eax) */
463         0x58,                           /* pop    %eax */
464         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
465         0x89, 0x02,                     /* mov    %eax,(%edx) */
466         0x5b,                           /* pop    %ebx */
467         0xc9,                           /* leave   */
468         0xc3,                           /* ret     */
469 };
470
471 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
472
473 static int 
474 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
475 {
476         int have_cpuid = 0;
477 #ifndef _MSC_VER
478         __asm__  __volatile__ (
479                 "pushfl\n"
480                 "popl %%eax\n"
481                 "movl %%eax, %%edx\n"
482                 "xorl $0x200000, %%eax\n"
483                 "pushl %%eax\n"
484                 "popfl\n"
485                 "pushfl\n"
486                 "popl %%eax\n"
487                 "xorl %%edx, %%eax\n"
488                 "andl $0x200000, %%eax\n"
489                 "movl %%eax, %0"
490                 : "=r" (have_cpuid)
491                 :
492                 : "%eax", "%edx"
493         );
494 #else
495         __asm {
496                 pushfd
497                 pop eax
498                 mov edx, eax
499                 xor eax, 0x200000
500                 push eax
501                 popfd
502                 pushfd
503                 pop eax
504                 xor eax, edx
505                 and eax, 0x200000
506                 mov have_cpuid, eax
507         }
508 #endif
509         if (have_cpuid) {
510                 /* Have to use the code manager to get around WinXP DEP */
511                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
512                 CpuidFunc func;
513                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
514                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
515
516                 func = (CpuidFunc)ptr;
517                 func (id, p_eax, p_ebx, p_ecx, p_edx);
518
519                 mono_code_manager_destroy (codeman);
520
521                 /*
522                  * We use this approach because of issues with gcc and pic code, see:
523                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
524                 __asm__ __volatile__ ("cpuid"
525                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
526                         : "a" (id));
527                 */
528                 return 1;
529         }
530         return 0;
531 }
532
533 /*
534  * Initialize the cpu to execute managed code.
535  */
536 void
537 mono_arch_cpu_init (void)
538 {
539         /* spec compliance requires running with double precision */
540 #ifndef _MSC_VER
541         guint16 fpcw;
542
543         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
544         fpcw &= ~X86_FPCW_PRECC_MASK;
545         fpcw |= X86_FPCW_PREC_DOUBLE;
546         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
547         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
548 #else
549         _control87 (_PC_64, MCW_PC);
550 #endif
551 }
552
553 /*
554  * This function returns the optimizations supported on this cpu.
555  */
556 guint32
557 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
558 {
559         int eax, ebx, ecx, edx;
560         guint32 opts = 0;
561         
562         *exclude_mask = 0;
563         /* Feature Flags function, flags returned in EDX. */
564         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
565                 if (edx & (1 << 15)) {
566                         opts |= MONO_OPT_CMOV;
567                         if (edx & 1)
568                                 opts |= MONO_OPT_FCMOV;
569                         else
570                                 *exclude_mask |= MONO_OPT_FCMOV;
571                 } else
572                         *exclude_mask |= MONO_OPT_CMOV;
573         }
574         return opts;
575 }
576
577 /*
578  * Determine whenever the trap whose info is in SIGINFO is caused by
579  * integer overflow.
580  */
581 gboolean
582 mono_arch_is_int_overflow (void *sigctx, void *info)
583 {
584         MonoContext ctx;
585         guint8* ip;
586
587         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
588
589         ip = (guint8*)ctx.eip;
590
591         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
592                 gint32 reg;
593
594                 /* idiv REG */
595                 switch (x86_modrm_rm (ip [1])) {
596                 case X86_ECX:
597                         reg = ctx.ecx;
598                         break;
599                 case X86_EBX:
600                         reg = ctx.ebx;
601                         break;
602                 default:
603                         g_assert_not_reached ();
604                         reg = -1;
605                 }
606
607                 if (reg == -1)
608                         return TRUE;
609         }
610                         
611         return FALSE;
612 }
613
614 static gboolean
615 is_regsize_var (MonoType *t) {
616         if (t->byref)
617                 return TRUE;
618         switch (mono_type_get_underlying_type (t)->type) {
619         case MONO_TYPE_I4:
620         case MONO_TYPE_U4:
621         case MONO_TYPE_I:
622         case MONO_TYPE_U:
623         case MONO_TYPE_PTR:
624         case MONO_TYPE_FNPTR:
625                 return TRUE;
626         case MONO_TYPE_OBJECT:
627         case MONO_TYPE_STRING:
628         case MONO_TYPE_CLASS:
629         case MONO_TYPE_SZARRAY:
630         case MONO_TYPE_ARRAY:
631                 return TRUE;
632         case MONO_TYPE_VALUETYPE:
633                 return FALSE;
634         }
635         return FALSE;
636 }
637
638 GList *
639 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
640 {
641         GList *vars = NULL;
642         int i;
643
644         for (i = 0; i < cfg->num_varinfo; i++) {
645                 MonoInst *ins = cfg->varinfo [i];
646                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
647
648                 /* unused vars */
649                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
650                         continue;
651
652                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
653                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
654                         continue;
655
656                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
657                  * 8bit quantities in caller saved registers on x86 */
658                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
659                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
660                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
661                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
662                         g_assert (i == vmv->idx);
663                         vars = g_list_prepend (vars, vmv);
664                 }
665         }
666
667         vars = mono_varlist_sort (cfg, vars, 0);
668
669         return vars;
670 }
671
672 GList *
673 mono_arch_get_global_int_regs (MonoCompile *cfg)
674 {
675         GList *regs = NULL;
676
677         /* we can use 3 registers for global allocation */
678         regs = g_list_prepend (regs, (gpointer)X86_EBX);
679         regs = g_list_prepend (regs, (gpointer)X86_ESI);
680         regs = g_list_prepend (regs, (gpointer)X86_EDI);
681
682         return regs;
683 }
684
685 /*
686  * mono_arch_regalloc_cost:
687  *
688  *  Return the cost, in number of memory references, of the action of 
689  * allocating the variable VMV into a register during global register
690  * allocation.
691  */
692 guint32
693 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
694 {
695         MonoInst *ins = cfg->varinfo [vmv->idx];
696
697         if (cfg->method->save_lmf)
698                 /* The register is already saved */
699                 return (ins->opcode == OP_ARG) ? 1 : 0;
700         else
701                 /* push+pop+possible load if it is an argument */
702                 return (ins->opcode == OP_ARG) ? 3 : 2;
703 }
704  
705 /*
706  * Set var information according to the calling convention. X86 version.
707  * The locals var stuff should most likely be split in another method.
708  */
709 void
710 mono_arch_allocate_vars (MonoCompile *m)
711 {
712         MonoMethodSignature *sig;
713         MonoMethodHeader *header;
714         MonoInst *inst;
715         guint32 locals_stack_size, locals_stack_align;
716         int i, offset, curinst, size, align;
717         gint32 *offsets;
718         CallInfo *cinfo;
719
720         header = mono_method_get_header (m->method);
721         sig = mono_method_signature (m->method);
722
723         offset = 8;
724         curinst = 0;
725
726         cinfo = get_call_info (sig, FALSE);
727
728         switch (cinfo->ret.storage) {
729         case ArgOnStack:
730                 m->ret->opcode = OP_REGOFFSET;
731                 m->ret->inst_basereg = X86_EBP;
732                 m->ret->inst_offset = offset;
733                 offset += sizeof (gpointer);
734                 break;
735         case ArgValuetypeInReg:
736                 break;
737         case ArgInIReg:
738                 m->ret->opcode = OP_REGVAR;
739                 m->ret->inst_c0 = cinfo->ret.reg;
740                 break;
741         case ArgNone:
742         case ArgOnFloatFpStack:
743         case ArgOnDoubleFpStack:
744                 break;
745         default:
746                 g_assert_not_reached ();
747         }
748
749         if (sig->hasthis) {
750                 inst = m->varinfo [curinst];
751                 if (inst->opcode != OP_REGVAR) {
752                         inst->opcode = OP_REGOFFSET;
753                         inst->inst_basereg = X86_EBP;
754                 }
755                 inst->inst_offset = offset;
756                 offset += sizeof (gpointer);
757                 curinst++;
758         }
759
760         if (sig->call_convention == MONO_CALL_VARARG) {
761                 m->sig_cookie = offset;
762                 offset += sizeof (gpointer);
763         }
764
765         for (i = 0; i < sig->param_count; ++i) {
766                 inst = m->varinfo [curinst];
767                 if (inst->opcode != OP_REGVAR) {
768                         inst->opcode = OP_REGOFFSET;
769                         inst->inst_basereg = X86_EBP;
770                 }
771                 inst->inst_offset = offset;
772                 size = mono_type_size (sig->params [i], &align);
773                 size += 4 - 1;
774                 size &= ~(4 - 1);
775                 offset += size;
776                 curinst++;
777         }
778
779         offset = 0;
780
781         /* reserve space to save LMF and caller saved registers */
782
783         if (m->method->save_lmf) {
784                 offset += sizeof (MonoLMF);
785         } else {
786                 if (m->used_int_regs & (1 << X86_EBX)) {
787                         offset += 4;
788                 }
789
790                 if (m->used_int_regs & (1 << X86_EDI)) {
791                         offset += 4;
792                 }
793
794                 if (m->used_int_regs & (1 << X86_ESI)) {
795                         offset += 4;
796                 }
797         }
798
799         switch (cinfo->ret.storage) {
800         case ArgValuetypeInReg:
801                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
802                 offset += 8;
803                 m->ret->opcode = OP_REGOFFSET;
804                 m->ret->inst_basereg = X86_EBP;
805                 m->ret->inst_offset = - offset;
806                 break;
807         default:
808                 break;
809         }
810
811         /* Allocate locals */
812         offsets = mono_allocate_stack_slots (m, &locals_stack_size, &locals_stack_align);
813         if (locals_stack_align) {
814                 offset += (locals_stack_align - 1);
815                 offset &= ~(locals_stack_align - 1);
816         }
817         for (i = m->locals_start; i < m->num_varinfo; i++) {
818                 if (offsets [i] != -1) {
819                         MonoInst *inst = m->varinfo [i];
820                         inst->opcode = OP_REGOFFSET;
821                         inst->inst_basereg = X86_EBP;
822                         inst->inst_offset = - (offset + offsets [i]);
823                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
824                 }
825         }
826         g_free (offsets);
827         offset += locals_stack_size;
828
829         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
830         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
831
832         g_free (cinfo);
833
834         /* change sign? */
835         m->stack_offset = -offset;
836 }
837
838 void
839 mono_arch_create_vars (MonoCompile *cfg)
840 {
841         MonoMethodSignature *sig;
842         CallInfo *cinfo;
843
844         sig = mono_method_signature (cfg->method);
845
846         cinfo = get_call_info (sig, FALSE);
847
848         if (cinfo->ret.storage == ArgValuetypeInReg)
849                 cfg->ret_var_is_local = TRUE;
850
851         g_free (cinfo);
852 }
853
854 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
855  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
856  */
857
858 /* 
859  * take the arguments and generate the arch-specific
860  * instructions to properly call the function in call.
861  * This includes pushing, moving arguments to the right register
862  * etc.
863  * Issue: who does the spilling if needed, and when?
864  */
865 MonoCallInst*
866 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
867         MonoInst *arg, *in;
868         MonoMethodSignature *sig;
869         int i, n, stack_size, type;
870         MonoType *ptype;
871         CallInfo *cinfo;
872
873         stack_size = 0;
874         /* add the vararg cookie before the non-implicit args */
875         if (call->signature->call_convention == MONO_CALL_VARARG) {
876                 MonoInst *sig_arg;
877                 /* FIXME: Add support for signature tokens to AOT */
878                 cfg->disable_aot = TRUE;
879                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
880                 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
881                 sig_arg->inst_p0 = call->signature;
882                 arg->inst_left = sig_arg;
883                 arg->type = STACK_PTR;
884                 /* prepend, so they get reversed */
885                 arg->next = call->out_args;
886                 call->out_args = arg;
887                 stack_size += sizeof (gpointer);
888         }
889         sig = call->signature;
890         n = sig->param_count + sig->hasthis;
891
892         cinfo = get_call_info (sig, FALSE);
893
894         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
895                 if (cinfo->ret.storage == ArgOnStack)
896                         stack_size += sizeof (gpointer);
897         }
898
899         for (i = 0; i < n; ++i) {
900                 if (is_virtual && i == 0) {
901                         /* the argument will be attached to the call instrucion */
902                         in = call->args [i];
903                         stack_size += 4;
904                 } else {
905                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
906                         in = call->args [i];
907                         arg->cil_code = in->cil_code;
908                         arg->inst_left = in;
909                         arg->type = in->type;
910                         /* prepend, so they get reversed */
911                         arg->next = call->out_args;
912                         call->out_args = arg;
913                         if (i >= sig->hasthis) {
914                                 MonoType *t = sig->params [i - sig->hasthis];
915                                 ptype = mono_type_get_underlying_type (t);
916                                 if (t->byref)
917                                         type = MONO_TYPE_U;
918                                 else
919                                         type = ptype->type;
920                                 /* FIXME: validate arguments... */
921                                 switch (type) {
922                                 case MONO_TYPE_I:
923                                 case MONO_TYPE_U:
924                                 case MONO_TYPE_BOOLEAN:
925                                 case MONO_TYPE_CHAR:
926                                 case MONO_TYPE_I1:
927                                 case MONO_TYPE_U1:
928                                 case MONO_TYPE_I2:
929                                 case MONO_TYPE_U2:
930                                 case MONO_TYPE_I4:
931                                 case MONO_TYPE_U4:
932                                 case MONO_TYPE_STRING:
933                                 case MONO_TYPE_CLASS:
934                                 case MONO_TYPE_OBJECT:
935                                 case MONO_TYPE_PTR:
936                                 case MONO_TYPE_FNPTR:
937                                 case MONO_TYPE_ARRAY:
938                                 case MONO_TYPE_SZARRAY:
939                                         stack_size += 4;
940                                         break;
941                                 case MONO_TYPE_I8:
942                                 case MONO_TYPE_U8:
943                                         stack_size += 8;
944                                         break;
945                                 case MONO_TYPE_R4:
946                                         stack_size += 4;
947                                         arg->opcode = OP_OUTARG_R4;
948                                         break;
949                                 case MONO_TYPE_R8:
950                                         stack_size += 8;
951                                         arg->opcode = OP_OUTARG_R8;
952                                         break;
953                                 case MONO_TYPE_VALUETYPE: {
954                                         int size;
955                                         if (sig->pinvoke) 
956                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
957                                         else 
958                                                 size = mono_type_stack_size (&in->klass->byval_arg, NULL);
959
960                                         stack_size += size;
961                                         arg->opcode = OP_OUTARG_VT;
962                                         arg->klass = in->klass;
963                                         arg->unused = sig->pinvoke;
964                                         arg->inst_imm = size; 
965                                         break;
966                                 }
967                                 case MONO_TYPE_TYPEDBYREF:
968                                         stack_size += sizeof (MonoTypedRef);
969                                         arg->opcode = OP_OUTARG_VT;
970                                         arg->klass = in->klass;
971                                         arg->unused = sig->pinvoke;
972                                         arg->inst_imm = sizeof (MonoTypedRef); 
973                                         break;
974                                 default:
975                                         g_error ("unknown type 0x%02x in mono_arch_call_opcode\n", type);
976                                 }
977                         } else {
978                                 /* the this argument */
979                                 stack_size += 4;
980                         }
981                 }
982         }
983
984         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
985                 if (cinfo->ret.storage == ArgValuetypeInReg) {
986                         MonoInst *zero_inst;
987                         /*
988                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
989                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
990                          * before calling the function. So we add a dummy instruction to represent pushing the 
991                          * struct return address to the stack. The return address will be saved to this stack slot 
992                          * by the code emitted in this_vret_args.
993                          */
994                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
995                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
996                         zero_inst->inst_p0 = 0;
997                         arg->inst_left = zero_inst;
998                         arg->type = STACK_PTR;
999                         /* prepend, so they get reversed */
1000                         arg->next = call->out_args;
1001                         call->out_args = arg;
1002                 }
1003                 else
1004                         /* if the function returns a struct, the called method already does a ret $0x4 */
1005                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
1006                                 stack_size -= 4;
1007         }
1008
1009         call->stack_usage = stack_size;
1010         g_free (cinfo);
1011
1012         /* 
1013          * should set more info in call, such as the stack space
1014          * used by the args that needs to be added back to esp
1015          */
1016
1017         return call;
1018 }
1019
1020 /*
1021  * Allow tracing to work with this interface (with an optional argument)
1022  */
1023 void*
1024 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1025 {
1026         guchar *code = p;
1027
1028         /* if some args are passed in registers, we need to save them here */
1029         x86_push_reg (code, X86_EBP);
1030
1031         if (cfg->compile_aot) {
1032                 x86_push_imm (code, cfg->method);
1033                 x86_mov_reg_imm (code, X86_EAX, func);
1034                 x86_call_reg (code, X86_EAX);
1035         } else {
1036                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1037                 x86_push_imm (code, cfg->method);
1038                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1039                 x86_call_code (code, 0);
1040         }
1041         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1042
1043         return code;
1044 }
1045
1046 enum {
1047         SAVE_NONE,
1048         SAVE_STRUCT,
1049         SAVE_EAX,
1050         SAVE_EAX_EDX,
1051         SAVE_FP
1052 };
1053
1054 void*
1055 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1056 {
1057         guchar *code = p;
1058         int arg_size = 0, save_mode = SAVE_NONE;
1059         MonoMethod *method = cfg->method;
1060         
1061         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1062         case MONO_TYPE_VOID:
1063                 /* special case string .ctor icall */
1064                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1065                         save_mode = SAVE_EAX;
1066                 else
1067                         save_mode = SAVE_NONE;
1068                 break;
1069         case MONO_TYPE_I8:
1070         case MONO_TYPE_U8:
1071                 save_mode = SAVE_EAX_EDX;
1072                 break;
1073         case MONO_TYPE_R4:
1074         case MONO_TYPE_R8:
1075                 save_mode = SAVE_FP;
1076                 break;
1077         case MONO_TYPE_VALUETYPE:
1078                 save_mode = SAVE_STRUCT;
1079                 break;
1080         default:
1081                 save_mode = SAVE_EAX;
1082                 break;
1083         }
1084
1085         switch (save_mode) {
1086         case SAVE_EAX_EDX:
1087                 x86_push_reg (code, X86_EDX);
1088                 x86_push_reg (code, X86_EAX);
1089                 if (enable_arguments) {
1090                         x86_push_reg (code, X86_EDX);
1091                         x86_push_reg (code, X86_EAX);
1092                         arg_size = 8;
1093                 }
1094                 break;
1095         case SAVE_EAX:
1096                 x86_push_reg (code, X86_EAX);
1097                 if (enable_arguments) {
1098                         x86_push_reg (code, X86_EAX);
1099                         arg_size = 4;
1100                 }
1101                 break;
1102         case SAVE_FP:
1103                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1104                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1105                 if (enable_arguments) {
1106                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1107                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1108                         arg_size = 8;
1109                 }
1110                 break;
1111         case SAVE_STRUCT:
1112                 if (enable_arguments) {
1113                         x86_push_membase (code, X86_EBP, 8);
1114                         arg_size = 4;
1115                 }
1116                 break;
1117         case SAVE_NONE:
1118         default:
1119                 break;
1120         }
1121
1122         if (cfg->compile_aot) {
1123                 x86_push_imm (code, method);
1124                 x86_mov_reg_imm (code, X86_EAX, func);
1125                 x86_call_reg (code, X86_EAX);
1126         } else {
1127                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1128                 x86_push_imm (code, method);
1129                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1130                 x86_call_code (code, 0);
1131         }
1132         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1133
1134         switch (save_mode) {
1135         case SAVE_EAX_EDX:
1136                 x86_pop_reg (code, X86_EAX);
1137                 x86_pop_reg (code, X86_EDX);
1138                 break;
1139         case SAVE_EAX:
1140                 x86_pop_reg (code, X86_EAX);
1141                 break;
1142         case SAVE_FP:
1143                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1144                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1145                 break;
1146         case SAVE_NONE:
1147         default:
1148                 break;
1149         }
1150
1151         return code;
1152 }
1153
1154 #define EMIT_COND_BRANCH(ins,cond,sign) \
1155 if (ins->flags & MONO_INST_BRLABEL) { \
1156         if (ins->inst_i0->inst_c0) { \
1157                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1158         } else { \
1159                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1160                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1161                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1162                         x86_branch8 (code, cond, 0, sign); \
1163                 else \
1164                         x86_branch32 (code, cond, 0, sign); \
1165         } \
1166 } else { \
1167         if (ins->inst_true_bb->native_offset) { \
1168                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1169         } else { \
1170                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1171                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1172                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1173                         x86_branch8 (code, cond, 0, sign); \
1174                 else \
1175                         x86_branch32 (code, cond, 0, sign); \
1176         } \
1177 }
1178
1179 /* emit an exception if condition is fail */
1180 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1181         do {                                                        \
1182                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1183                                     MONO_PATCH_INFO_EXC, exc_name);  \
1184                 x86_branch32 (code, cond, 0, signed);               \
1185         } while (0); 
1186
1187 #define EMIT_FPCOMPARE(code) do { \
1188         x86_fcompp (code); \
1189         x86_fnstsw (code); \
1190 } while (0); 
1191
1192
1193 static guint8*
1194 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1195 {
1196         if (cfg->compile_aot) {
1197                 guint32 got_reg = X86_EAX;
1198
1199                 if (cfg->compile_aot) {          
1200                         /*
1201                          * Since the patches are generated by the back end, there is
1202                          * no way to generate a got_var at this point.
1203                          */
1204                         g_assert (cfg->got_var);
1205
1206                         if (cfg->got_var->opcode == OP_REGOFFSET)
1207                                 x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
1208                         else
1209                                 got_reg = cfg->got_var->dreg;
1210                 }
1211
1212                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1213                 x86_call_membase (code, got_reg, 0xf0f0f0f0);
1214         }
1215         else {
1216                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1217                 x86_call_code (code, 0);
1218         }
1219
1220         return code;
1221 }
1222
1223 /* FIXME: Add more instructions */
1224 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1225
1226 static void
1227 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1228 {
1229         MonoInst *ins, *last_ins = NULL;
1230         ins = bb->code;
1231
1232         while (ins) {
1233
1234                 switch (ins->opcode) {
1235                 case OP_ICONST:
1236                         /* reg = 0 -> XOR (reg, reg) */
1237                         /* XOR sets cflags on x86, so we cant do it always */
1238                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1239                                 ins->opcode = CEE_XOR;
1240                                 ins->sreg1 = ins->dreg;
1241                                 ins->sreg2 = ins->dreg;
1242                         }
1243                         break;
1244                 case OP_MUL_IMM: 
1245                         /* remove unnecessary multiplication with 1 */
1246                         if (ins->inst_imm == 1) {
1247                                 if (ins->dreg != ins->sreg1) {
1248                                         ins->opcode = OP_MOVE;
1249                                 } else {
1250                                         last_ins->next = ins->next;
1251                                         ins = ins->next;
1252                                         continue;
1253                                 }
1254                         }
1255                         break;
1256                 case OP_COMPARE_IMM:
1257                         /* OP_COMPARE_IMM (reg, 0) 
1258                          * --> 
1259                          * OP_X86_TEST_NULL (reg) 
1260                          */
1261                         if (!ins->inst_imm)
1262                                 ins->opcode = OP_X86_TEST_NULL;
1263                         break;
1264                 case OP_X86_COMPARE_MEMBASE_IMM:
1265                         /* 
1266                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1267                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1268                          * -->
1269                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1270                          * OP_COMPARE_IMM reg, imm
1271                          *
1272                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1273                          */
1274                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1275                             ins->inst_basereg == last_ins->inst_destbasereg &&
1276                             ins->inst_offset == last_ins->inst_offset) {
1277                                         ins->opcode = OP_COMPARE_IMM;
1278                                         ins->sreg1 = last_ins->sreg1;
1279
1280                                         /* check if we can remove cmp reg,0 with test null */
1281                                         if (!ins->inst_imm)
1282                                                 ins->opcode = OP_X86_TEST_NULL;
1283                                 }
1284
1285                         break;
1286                 case OP_LOAD_MEMBASE:
1287                 case OP_LOADI4_MEMBASE:
1288                         /* 
1289                          * Note: if reg1 = reg2 the load op is removed
1290                          *
1291                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1292                          * OP_LOAD_MEMBASE offset(basereg), reg2
1293                          * -->
1294                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1295                          * OP_MOVE reg1, reg2
1296                          */
1297                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1298                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1299                             ins->inst_basereg == last_ins->inst_destbasereg &&
1300                             ins->inst_offset == last_ins->inst_offset) {
1301                                 if (ins->dreg == last_ins->sreg1) {
1302                                         last_ins->next = ins->next;                             
1303                                         ins = ins->next;                                
1304                                         continue;
1305                                 } else {
1306                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1307                                         ins->opcode = OP_MOVE;
1308                                         ins->sreg1 = last_ins->sreg1;
1309                                 }
1310
1311                         /* 
1312                          * Note: reg1 must be different from the basereg in the second load
1313                          * Note: if reg1 = reg2 is equal then second load is removed
1314                          *
1315                          * OP_LOAD_MEMBASE offset(basereg), reg1
1316                          * OP_LOAD_MEMBASE offset(basereg), reg2
1317                          * -->
1318                          * OP_LOAD_MEMBASE offset(basereg), reg1
1319                          * OP_MOVE reg1, reg2
1320                          */
1321                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1322                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1323                               ins->inst_basereg != last_ins->dreg &&
1324                               ins->inst_basereg == last_ins->inst_basereg &&
1325                               ins->inst_offset == last_ins->inst_offset) {
1326
1327                                 if (ins->dreg == last_ins->dreg) {
1328                                         last_ins->next = ins->next;                             
1329                                         ins = ins->next;                                
1330                                         continue;
1331                                 } else {
1332                                         ins->opcode = OP_MOVE;
1333                                         ins->sreg1 = last_ins->dreg;
1334                                 }
1335
1336                                 //g_assert_not_reached ();
1337
1338 #if 0
1339                         /* 
1340                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1341                          * OP_LOAD_MEMBASE offset(basereg), reg
1342                          * -->
1343                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1344                          * OP_ICONST reg, imm
1345                          */
1346                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1347                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1348                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1349                                    ins->inst_offset == last_ins->inst_offset) {
1350                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1351                                 ins->opcode = OP_ICONST;
1352                                 ins->inst_c0 = last_ins->inst_imm;
1353                                 g_assert_not_reached (); // check this rule
1354 #endif
1355                         }
1356                         break;
1357                 case OP_LOADU1_MEMBASE:
1358                 case OP_LOADI1_MEMBASE:
1359                         /* 
1360                          * Note: if reg1 = reg2 the load op is removed
1361                          *
1362                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1363                          * OP_LOAD_MEMBASE offset(basereg), reg2
1364                          * -->
1365                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1366                          * OP_MOVE reg1, reg2
1367                          */
1368                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1369                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1370                                         ins->inst_offset == last_ins->inst_offset) {
1371                                 if (ins->dreg == last_ins->sreg1) {
1372                                         last_ins->next = ins->next;                             
1373                                         ins = ins->next;                                
1374                                         continue;
1375                                 } else {
1376                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1377                                         ins->opcode = OP_MOVE;
1378                                         ins->sreg1 = last_ins->sreg1;
1379                                 }
1380                         }
1381                         break;
1382                 case OP_LOADU2_MEMBASE:
1383                 case OP_LOADI2_MEMBASE:
1384                         /* 
1385                          * Note: if reg1 = reg2 the load op is removed
1386                          *
1387                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1388                          * OP_LOAD_MEMBASE offset(basereg), reg2
1389                          * -->
1390                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1391                          * OP_MOVE reg1, reg2
1392                          */
1393                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1394                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1395                                         ins->inst_offset == last_ins->inst_offset) {
1396                                 if (ins->dreg == last_ins->sreg1) {
1397                                         last_ins->next = ins->next;                             
1398                                         ins = ins->next;                                
1399                                         continue;
1400                                 } else {
1401                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1402                                         ins->opcode = OP_MOVE;
1403                                         ins->sreg1 = last_ins->sreg1;
1404                                 }
1405                         }
1406                         break;
1407                 case CEE_CONV_I4:
1408                 case CEE_CONV_U4:
1409                 case OP_MOVE:
1410                         /*
1411                          * Removes:
1412                          *
1413                          * OP_MOVE reg, reg 
1414                          */
1415                         if (ins->dreg == ins->sreg1) {
1416                                 if (last_ins)
1417                                         last_ins->next = ins->next;                             
1418                                 ins = ins->next;
1419                                 continue;
1420                         }
1421                         /* 
1422                          * Removes:
1423                          *
1424                          * OP_MOVE sreg, dreg 
1425                          * OP_MOVE dreg, sreg
1426                          */
1427                         if (last_ins && last_ins->opcode == OP_MOVE &&
1428                             ins->sreg1 == last_ins->dreg &&
1429                             ins->dreg == last_ins->sreg1) {
1430                                 last_ins->next = ins->next;                             
1431                                 ins = ins->next;                                
1432                                 continue;
1433                         }
1434                         break;
1435                         
1436                 case OP_X86_PUSH_MEMBASE:
1437                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1438                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1439                             ins->inst_basereg == last_ins->inst_destbasereg &&
1440                             ins->inst_offset == last_ins->inst_offset) {
1441                                     ins->opcode = OP_X86_PUSH;
1442                                     ins->sreg1 = last_ins->sreg1;
1443                         }
1444                         break;
1445                 }
1446                 last_ins = ins;
1447                 ins = ins->next;
1448         }
1449         bb->last_ins = last_ins;
1450 }
1451
1452 static const int 
1453 branch_cc_table [] = {
1454         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1455         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1456         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1457 };
1458
1459 #define DEBUG(a) if (cfg->verbose_level > 1) a
1460 //#define DEBUG(a)
1461
1462 /*
1463  * returns the offset used by spillvar. It allocates a new
1464  * spill variable if necessary. 
1465  */
1466 static int
1467 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1468 {
1469         MonoSpillInfo **si, *info;
1470         int i = 0;
1471
1472         si = &cfg->spill_info; 
1473         
1474         while (i <= spillvar) {
1475
1476                 if (!*si) {
1477                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1478                         info->next = NULL;
1479                         cfg->stack_offset -= sizeof (gpointer);
1480                         info->offset = cfg->stack_offset;
1481                 }
1482
1483                 if (i == spillvar)
1484                         return (*si)->offset;
1485
1486                 i++;
1487                 si = &(*si)->next;
1488         }
1489
1490         g_assert_not_reached ();
1491         return 0;
1492 }
1493
1494 /*
1495  * returns the offset used by spillvar. It allocates a new
1496  * spill float variable if necessary. 
1497  * (same as mono_spillvar_offset but for float)
1498  */
1499 static int
1500 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1501 {
1502         MonoSpillInfo **si, *info;
1503         int i = 0;
1504
1505         si = &cfg->spill_info_float; 
1506         
1507         while (i <= spillvar) {
1508
1509                 if (!*si) {
1510                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1511                         info->next = NULL;
1512                         cfg->stack_offset -= sizeof (double);
1513                         info->offset = cfg->stack_offset;
1514                 }
1515
1516                 if (i == spillvar)
1517                         return (*si)->offset;
1518
1519                 i++;
1520                 si = &(*si)->next;
1521         }
1522
1523         g_assert_not_reached ();
1524         return 0;
1525 }
1526
1527 /*
1528  * Creates a store for spilled floating point items
1529  */
1530 static MonoInst*
1531 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1532 {
1533         MonoInst *store;
1534         MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1535         store->sreg1 = reg;
1536         store->inst_destbasereg = X86_EBP;
1537         store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1538
1539         DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08x(%%sp)) (from %d)\n", spill, store->inst_offset, reg));
1540         return store;
1541 }
1542
1543 /*
1544  * Creates a load for spilled floating point items 
1545  */
1546 static MonoInst*
1547 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1548 {
1549         MonoInst *load;
1550         MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1551         load->dreg = reg;
1552         load->inst_basereg = X86_EBP;
1553         load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1554
1555         DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08x(%%sp)) (from %d)\n", spill, load->inst_offset, reg));
1556         return load;
1557 }
1558
1559 #define is_global_ireg(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && !X86_IS_CALLEE ((r)))
1560 #define reg_is_freeable(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && X86_IS_CALLEE ((r)))
1561
1562 typedef struct {
1563         int born_in;
1564         int killed_in;
1565         int last_use;
1566         int prev_use;
1567         int flags;              /* used to track fp spill/load */
1568 } RegTrack;
1569
1570 static const char*const * ins_spec = pentium_desc;
1571
1572 static void
1573 print_ins (int i, MonoInst *ins)
1574 {
1575         const char *spec = ins_spec [ins->opcode];
1576         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1577         if (spec [MONO_INST_DEST]) {
1578                 if (ins->dreg >= MONO_MAX_IREGS)
1579                         g_print (" R%d <-", ins->dreg);
1580                 else
1581                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1582         }
1583         if (spec [MONO_INST_SRC1]) {
1584                 if (ins->sreg1 >= MONO_MAX_IREGS)
1585                         g_print (" R%d", ins->sreg1);
1586                 else
1587                         g_print (" %s", mono_arch_regname (ins->sreg1));
1588         }
1589         if (spec [MONO_INST_SRC2]) {
1590                 if (ins->sreg2 >= MONO_MAX_IREGS)
1591                         g_print (" R%d", ins->sreg2);
1592                 else
1593                         g_print (" %s", mono_arch_regname (ins->sreg2));
1594         }
1595         if (spec [MONO_INST_CLOB])
1596                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1597         g_print ("\n");
1598 }
1599
1600 static void
1601 print_regtrack (RegTrack *t, int num)
1602 {
1603         int i;
1604         char buf [32];
1605         const char *r;
1606         
1607         for (i = 0; i < num; ++i) {
1608                 if (!t [i].born_in)
1609                         continue;
1610                 if (i >= MONO_MAX_IREGS) {
1611                         g_snprintf (buf, sizeof(buf), "R%d", i);
1612                         r = buf;
1613                 } else
1614                         r = mono_arch_regname (i);
1615                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1616         }
1617 }
1618
1619 typedef struct InstList InstList;
1620
1621 struct InstList {
1622         InstList *prev;
1623         InstList *next;
1624         MonoInst *data;
1625 };
1626
1627 static inline InstList*
1628 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1629 {
1630         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1631         item->data = data;
1632         item->prev = NULL;
1633         item->next = list;
1634         if (list)
1635                 list->prev = item;
1636         return item;
1637 }
1638
1639 /*
1640  * Force the spilling of the variable in the symbolic register 'reg'.
1641  */
1642 static int
1643 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1644 {
1645         MonoInst *load;
1646         int i, sel, spill;
1647         
1648         sel = cfg->rs->iassign [reg];
1649         /*i = cfg->rs->isymbolic [sel];
1650         g_assert (i == reg);*/
1651         i = reg;
1652         spill = ++cfg->spill_count;
1653         cfg->rs->iassign [i] = -spill - 1;
1654         mono_regstate_free_int (cfg->rs, sel);
1655         /* we need to create a spill var and insert a load to sel after the current instruction */
1656         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1657         load->dreg = sel;
1658         load->inst_basereg = X86_EBP;
1659         load->inst_offset = mono_spillvar_offset (cfg, spill);
1660         if (item->prev) {
1661                 while (ins->next != item->prev->data)
1662                         ins = ins->next;
1663         }
1664         load->next = ins->next;
1665         ins->next = load;
1666         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1667         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1668         g_assert (i == sel);
1669
1670         return sel;
1671 }
1672
1673 static int
1674 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1675 {
1676         MonoInst *load;
1677         int i, sel, spill;
1678
1679         DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1680         /* exclude the registers in the current instruction */
1681         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1682                 if (ins->sreg1 >= MONO_MAX_IREGS)
1683                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1684                 else
1685                         regmask &= ~ (1 << ins->sreg1);
1686                 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1687         }
1688         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1689                 if (ins->sreg2 >= MONO_MAX_IREGS)
1690                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1691                 else
1692                         regmask &= ~ (1 << ins->sreg2);
1693                 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1694         }
1695         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1696                 regmask &= ~ (1 << ins->dreg);
1697                 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1698         }
1699
1700         DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1701         g_assert (regmask); /* need at least a register we can free */
1702         sel = -1;
1703         /* we should track prev_use and spill the register that's farther */
1704         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1705                 if (regmask & (1 << i)) {
1706                         sel = i;
1707                         DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1708                         break;
1709                 }
1710         }
1711         i = cfg->rs->isymbolic [sel];
1712         spill = ++cfg->spill_count;
1713         cfg->rs->iassign [i] = -spill - 1;
1714         mono_regstate_free_int (cfg->rs, sel);
1715         /* we need to create a spill var and insert a load to sel after the current instruction */
1716         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1717         load->dreg = sel;
1718         load->inst_basereg = X86_EBP;
1719         load->inst_offset = mono_spillvar_offset (cfg, spill);
1720         if (item->prev) {
1721                 while (ins->next != item->prev->data)
1722                         ins = ins->next;
1723         }
1724         load->next = ins->next;
1725         ins->next = load;
1726         DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1727         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1728         g_assert (i == sel);
1729         
1730         return sel;
1731 }
1732
1733 static MonoInst*
1734 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1735 {
1736         MonoInst *copy;
1737         MONO_INST_NEW (cfg, copy, OP_MOVE);
1738         copy->dreg = dest;
1739         copy->sreg1 = src;
1740         if (ins) {
1741                 copy->next = ins->next;
1742                 ins->next = copy;
1743         }
1744         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1745         return copy;
1746 }
1747
1748 static MonoInst*
1749 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1750 {
1751         MonoInst *store;
1752         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1753         store->sreg1 = reg;
1754         store->inst_destbasereg = X86_EBP;
1755         store->inst_offset = mono_spillvar_offset (cfg, spill);
1756         if (ins) {
1757                 store->next = ins->next;
1758                 ins->next = store;
1759         }
1760         DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08x(%%ebp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
1761         return store;
1762 }
1763
1764 static void
1765 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1766 {
1767         MonoInst *prev;
1768         if (item->next) {
1769                 prev = item->next->data;
1770
1771                 while (prev->next != ins)
1772                         prev = prev->next;
1773                 to_insert->next = ins;
1774                 prev->next = to_insert;
1775         } else {
1776                 to_insert->next = ins;
1777         }
1778         /* 
1779          * needed otherwise in the next instruction we can add an ins to the 
1780          * end and that would get past this instruction.
1781          */
1782         item->data = to_insert; 
1783 }
1784
1785
1786 #if  0
1787 static int
1788 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1789 {
1790         int val = cfg->rs->iassign [sym_reg];
1791         if (val < 0) {
1792                 int spill = 0;
1793                 if (val < -1) {
1794                         /* the register gets spilled after this inst */
1795                         spill = -val -1;
1796                 }
1797                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1798                 if (val < 0)
1799                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1800                 cfg->rs->iassign [sym_reg] = val;
1801                 /* add option to store before the instruction for src registers */
1802                 if (spill)
1803                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1804         }
1805         cfg->rs->isymbolic [val] = sym_reg;
1806         return val;
1807 }
1808 #endif
1809
1810 /* flags used in reginfo->flags */
1811 enum {
1812         MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
1813         MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
1814         MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
1815         MONO_X86_REG_NOT_ECX                    = 1 << 3,
1816         MONO_X86_REG_EAX                                = 1 << 4,
1817         MONO_X86_REG_EDX                                = 1 << 5,
1818         MONO_X86_REG_ECX                                = 1 << 6
1819 };
1820
1821 static int
1822 mono_x86_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1823 {
1824         int val;
1825         int test_mask = dest_mask;
1826
1827         if (flags & MONO_X86_REG_EAX)
1828                 test_mask &= (1 << X86_EAX);
1829         else if (flags & MONO_X86_REG_EDX)
1830                 test_mask &= (1 << X86_EDX);
1831         else if (flags & MONO_X86_REG_ECX)
1832                 test_mask &= (1 << X86_ECX);
1833         else if (flags & MONO_X86_REG_NOT_ECX)
1834                 test_mask &= ~ (1 << X86_ECX);
1835
1836         val = mono_regstate_alloc_int (cfg->rs, test_mask);
1837         if (val >= 0 && test_mask != dest_mask)
1838                 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1839
1840         if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1841                 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1842                 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << X86_ECX)));
1843         }
1844
1845         if (val < 0) {
1846                 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1847                 if (val < 0)
1848                         val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1849         }
1850
1851         return val;
1852 }
1853
1854 static inline void
1855 assign_ireg (MonoRegState *rs, int reg, int hreg)
1856 {
1857         g_assert (reg >= MONO_MAX_IREGS);
1858         g_assert (hreg < MONO_MAX_IREGS);
1859         g_assert (! is_global_ireg (hreg));
1860
1861         rs->iassign [reg] = hreg;
1862         rs->isymbolic [hreg] = reg;
1863         rs->ifree_mask &= ~ (1 << hreg);
1864 }
1865
1866 /*#include "cprop.c"*/
1867
1868 /*
1869  * Local register allocation.
1870  * We first scan the list of instructions and we save the liveness info of
1871  * each register (when the register is first used, when it's value is set etc.).
1872  * We also reverse the list of instructions (in the InstList list) because assigning
1873  * registers backwards allows for more tricks to be used.
1874  */
1875 void
1876 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1877 {
1878         MonoInst *ins;
1879         MonoRegState *rs = cfg->rs;
1880         int i, val, fpcount;
1881         RegTrack *reginfo, *reginfof;
1882         RegTrack *reginfo1, *reginfo2, *reginfod;
1883         InstList *tmp, *reversed = NULL;
1884         const char *spec;
1885         guint32 src1_mask, src2_mask, dest_mask;
1886         GList *fspill_list = NULL;
1887         int fspill = 0;
1888
1889         if (!bb->code)
1890                 return;
1891         rs->next_vireg = bb->max_ireg;
1892         rs->next_vfreg = bb->max_freg;
1893         mono_regstate_assign (rs);
1894         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1895         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1896         rs->ifree_mask = X86_CALLEE_REGS;
1897
1898         ins = bb->code;
1899
1900         /*if (cfg->opt & MONO_OPT_COPYPROP)
1901                 local_copy_prop (cfg, ins);*/
1902
1903         i = 1;
1904         fpcount = 0;
1905         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1906         /* forward pass on the instructions to collect register liveness info */
1907         while (ins) {
1908                 spec = ins_spec [ins->opcode];
1909                 
1910                 DEBUG (print_ins (i, ins));
1911
1912                 if (spec [MONO_INST_SRC1]) {
1913                         if (spec [MONO_INST_SRC1] == 'f') {
1914                                 GList *spill;
1915                                 reginfo1 = reginfof;
1916
1917                                 spill = g_list_first (fspill_list);
1918                                 if (spill && fpcount < MONO_MAX_FREGS) {
1919                                         reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1920                                         fspill_list = g_list_remove (fspill_list, spill->data);
1921                                 } else
1922                                         fpcount--;
1923                         }
1924                         else
1925                                 reginfo1 = reginfo;
1926                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1927                         reginfo1 [ins->sreg1].last_use = i;
1928                         if (spec [MONO_INST_SRC1] == 'L') {
1929                                 /* The virtual register is allocated sequentially */
1930                                 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1931                                 reginfo1 [ins->sreg1 + 1].last_use = i;
1932                                 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1933                                         reginfo1 [ins->sreg1 + 1].born_in = i;
1934
1935                                 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1936                                 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1937                         }
1938                 } else {
1939                         ins->sreg1 = -1;
1940                 }
1941                 if (spec [MONO_INST_SRC2]) {
1942                         if (spec [MONO_INST_SRC2] == 'f') {
1943                                 GList *spill;
1944                                 reginfo2 = reginfof;
1945                                 spill = g_list_first (fspill_list);
1946                                 if (spill) {
1947                                         reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1948                                         fspill_list = g_list_remove (fspill_list, spill->data);
1949                                         if (fpcount >= MONO_MAX_FREGS) {
1950                                                 fspill++;
1951                                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1952                                                 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1953                                         }
1954                                 } else
1955                                         fpcount--;
1956                         }
1957                         else
1958                                 reginfo2 = reginfo;
1959                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1960                         reginfo2 [ins->sreg2].last_use = i;
1961                         if (spec [MONO_INST_SRC2] == 'L') {
1962                                 /* The virtual register is allocated sequentially */
1963                                 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1964                                 reginfo2 [ins->sreg2 + 1].last_use = i;
1965                                 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1966                                         reginfo2 [ins->sreg2 + 1].born_in = i;
1967                         }
1968                         if (spec [MONO_INST_CLOB] == 's') {
1969                                 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1970                                 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1971                         }
1972                 } else {
1973                         ins->sreg2 = -1;
1974                 }
1975                 if (spec [MONO_INST_DEST]) {
1976                         if (spec [MONO_INST_DEST] == 'f') {
1977                                 reginfod = reginfof;
1978                                 if (fpcount >= MONO_MAX_FREGS) {
1979                                         reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1980                                         fspill++;
1981                                         fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1982                                         fpcount--;
1983                                 }
1984                                 fpcount++;
1985                         }
1986                         else
1987                                 reginfod = reginfo;
1988                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1989                                 reginfod [ins->dreg].killed_in = i;
1990                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1991                         reginfod [ins->dreg].last_use = i;
1992                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1993                                 reginfod [ins->dreg].born_in = i;
1994                         if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
1995                                 /* The virtual register is allocated sequentially */
1996                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1997                                 reginfod [ins->dreg + 1].last_use = i;
1998                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1999                                         reginfod [ins->dreg + 1].born_in = i;
2000
2001                                 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
2002                                 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
2003                         }
2004                 } else {
2005                         ins->dreg = -1;
2006                 }
2007
2008                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
2009                 ++i;
2010                 ins = ins->next;
2011         }
2012
2013         // todo: check if we have anything left on fp stack, in verify mode?
2014         fspill = 0;
2015
2016         DEBUG (print_regtrack (reginfo, rs->next_vireg));
2017         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
2018         tmp = reversed;
2019         while (tmp) {
2020                 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
2021                 dest_mask = src1_mask = src2_mask = X86_CALLEE_REGS;
2022                 --i;
2023                 ins = tmp->data;
2024                 spec = ins_spec [ins->opcode];
2025                 prev_dreg = -1;
2026                 clob_dreg = -1;
2027                 DEBUG (g_print ("processing:"));
2028                 DEBUG (print_ins (i, ins));
2029                 if (spec [MONO_INST_CLOB] == 's') {
2030                         /*
2031                          * Shift opcodes, SREG2 must be RCX
2032                          */
2033                         if (rs->ifree_mask & (1 << X86_ECX)) {
2034                                 if (ins->sreg2 < MONO_MAX_IREGS) {
2035                                         /* Argument already in hard reg, need to copy */
2036                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
2037                                         insert_before_ins (ins, tmp, copy);
2038                                 }
2039                                 else {
2040                                         DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
2041                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2042                                 }
2043                         } else {
2044                                 int need_ecx_spill = TRUE;
2045                                 /* 
2046                                  * we first check if src1/dreg is already assigned a register
2047                                  * and then we force a spill of the var assigned to ECX.
2048                                  */
2049                                 /* the destination register can't be ECX */
2050                                 dest_mask &= ~ (1 << X86_ECX);
2051                                 src1_mask &= ~ (1 << X86_ECX);
2052                                 val = rs->iassign [ins->dreg];
2053                                 /* 
2054                                  * the destination register is already assigned to ECX:
2055                                  * we need to allocate another register for it and then
2056                                  * copy from this to ECX.
2057                                  */
2058                                 if (val == X86_ECX && ins->dreg != ins->sreg2) {
2059                                         int new_dest;
2060                                         new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2061                                         g_assert (new_dest >= 0);
2062                                         DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
2063
2064                                         rs->isymbolic [new_dest] = ins->dreg;
2065                                         rs->iassign [ins->dreg] = new_dest;
2066                                         clob_dreg = ins->dreg;
2067                                         ins->dreg = new_dest;
2068                                         create_copy_ins (cfg, X86_ECX, new_dest, ins);
2069                                         need_ecx_spill = FALSE;
2070                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
2071                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
2072                                         rs->iassign [ins->dreg] = val;
2073                                         rs->isymbolic [val] = prev_dreg;
2074                                         ins->dreg = val;*/
2075                                 }
2076                                 if (is_global_ireg (ins->sreg2)) {
2077                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
2078                                         insert_before_ins (ins, tmp, copy);
2079                                 }
2080                                 else {
2081                                         val = rs->iassign [ins->sreg2];
2082                                         if (val >= 0 && val != X86_ECX) {
2083                                                 MonoInst *move = create_copy_ins (cfg, X86_ECX, val, NULL);
2084                                                 DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
2085                                                 move->next = ins;
2086                                                 g_assert_not_reached ();
2087                                                 /* FIXME: where is move connected to the instruction list? */
2088                                                 //tmp->prev->data->next = move;
2089                                         }
2090                                         else {
2091                                                 if (val == X86_ECX)
2092                                                 need_ecx_spill = FALSE;
2093                                         }
2094                                 }
2095                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << X86_ECX))) {
2096                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_ECX]));
2097                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_ECX]);
2098                                         mono_regstate_free_int (rs, X86_ECX);
2099                                 }
2100                                 if (!is_global_ireg (ins->sreg2))
2101                                         /* force-set sreg2 */
2102                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2103                         }
2104                         ins->sreg2 = X86_ECX;
2105                 } else if (spec [MONO_INST_CLOB] == 'd') {
2106                         /*
2107                          * DIVISION/REMAINER
2108                          */
2109                         int dest_reg = X86_EAX;
2110                         int clob_reg = X86_EDX;
2111                         if (spec [MONO_INST_DEST] == 'd') {
2112                                 dest_reg = X86_EDX; /* reminder */
2113                                 clob_reg = X86_EAX;
2114                         }
2115                         if (is_global_ireg (ins->dreg))
2116                                 val = ins->dreg;
2117                         else
2118                                 val = rs->iassign [ins->dreg];
2119                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
2120                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2121                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2122                                 mono_regstate_free_int (rs, dest_reg);
2123                         }
2124                         if (val < 0) {
2125                                 if (val < -1) {
2126                                         /* the register gets spilled after this inst */
2127                                         int spill = -val -1;
2128                                         dest_mask = 1 << dest_reg;
2129                                         prev_dreg = ins->dreg;
2130                                         val = mono_regstate_alloc_int (rs, dest_mask);
2131                                         if (val < 0)
2132                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
2133                                         rs->iassign [ins->dreg] = val;
2134                                         if (spill)
2135                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
2136                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2137                                         rs->isymbolic [val] = prev_dreg;
2138                                         ins->dreg = val;
2139                                 } else {
2140                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
2141                                         prev_dreg = ins->dreg;
2142                                         assign_ireg (rs, ins->dreg, dest_reg);
2143                                         ins->dreg = dest_reg;
2144                                         val = dest_reg;
2145                                 }
2146                         }
2147
2148                         //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
2149                         if (val != dest_reg) { /* force a copy */
2150                                 create_copy_ins (cfg, val, dest_reg, ins);
2151                                 if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
2152                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2153                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2154                                         mono_regstate_free_int (rs, dest_reg);
2155                                 }
2156                         }
2157                         if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
2158                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2159                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2160                                 mono_regstate_free_int (rs, clob_reg);
2161                         }
2162                         src1_mask = 1 << X86_EAX;
2163                         src2_mask = 1 << X86_ECX;
2164                 } else if (spec [MONO_INST_DEST] == 'l') {
2165                         int hreg;
2166                         val = rs->iassign [ins->dreg];
2167                         /* check special case when dreg have been moved from ecx (clob shift) */
2168                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2169                                 hreg = clob_dreg + 1;
2170                         else
2171                                 hreg = ins->dreg + 1;
2172
2173                         /* base prev_dreg on fixed hreg, handle clob case */
2174                         val = hreg - 1;
2175
2176                         if (val != rs->isymbolic [X86_EAX] && !(rs->ifree_mask & (1 << X86_EAX))) {
2177                                 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [X86_EAX]));
2178                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2179                                 mono_regstate_free_int (rs, X86_EAX);
2180                         }
2181                         if (hreg != rs->isymbolic [X86_EDX] && !(rs->ifree_mask & (1 << X86_EDX))) {
2182                                 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [X86_EDX]));
2183                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
2184                                 mono_regstate_free_int (rs, X86_EDX);
2185                         }
2186                 } else if (spec [MONO_INST_CLOB] == 'b') {
2187                         /*
2188                          * x86_set_reg instructions, dreg needs to be EAX..EDX
2189                          */     
2190                         dest_mask = (1 << X86_EAX) | (1 << X86_EBX) | (1 << X86_ECX) | (1 << X86_EDX);
2191                         if ((ins->dreg < MONO_MAX_IREGS) && (! (dest_mask & (1 << ins->dreg)))) {
2192                                 /* 
2193                                  * ins->dreg is already a hard reg, need to allocate another
2194                                  * suitable hard reg and make a copy.
2195                                  */
2196                                 int new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2197                                 g_assert (new_dest >= 0);
2198
2199                                 create_copy_ins (cfg, ins->dreg, new_dest, ins);
2200                                 DEBUG (g_print ("\tclob:b changing dreg R%d to %s\n", ins->dreg, mono_arch_regname (new_dest)));
2201                                 ins->dreg = new_dest;
2202
2203                                 /* The hard reg is no longer needed */
2204                                 mono_regstate_free_int (rs, new_dest);
2205                         }
2206                 }
2207
2208                 /*
2209                  * TRACK DREG
2210                  */
2211                 if (spec [MONO_INST_DEST] == 'f') {
2212                         if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
2213                                 GList *spill_node;
2214                                 MonoInst *store;
2215                                 spill_node = g_list_first (fspill_list);
2216                                 g_assert (spill_node);
2217
2218                                 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
2219                                 insert_before_ins (ins, tmp, store);
2220                                 fspill_list = g_list_remove (fspill_list, spill_node->data);
2221                                 fspill--;
2222                         }
2223                 } else if (spec [MONO_INST_DEST] == 'L') {
2224                         int hreg;
2225                         val = rs->iassign [ins->dreg];
2226                         /* check special case when dreg have been moved from ecx (clob shift) */
2227                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2228                                 hreg = clob_dreg + 1;
2229                         else
2230                                 hreg = ins->dreg + 1;
2231
2232                         /* base prev_dreg on fixed hreg, handle clob case */
2233                         prev_dreg = hreg - 1;
2234
2235                         if (val < 0) {
2236                                 int spill = 0;
2237                                 if (val < -1) {
2238                                         /* the register gets spilled after this inst */
2239                                         spill = -val -1;
2240                                 }
2241                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2242                                 rs->iassign [ins->dreg] = val;
2243                                 if (spill)
2244                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2245                         }
2246
2247                         DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
2248  
2249                         rs->isymbolic [val] = hreg - 1;
2250                         ins->dreg = val;
2251                         
2252                         val = rs->iassign [hreg];
2253                         if (val < 0) {
2254                                 int spill = 0;
2255                                 if (val < -1) {
2256                                         /* the register gets spilled after this inst */
2257                                         spill = -val -1;
2258                                 }
2259                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2260                                 rs->iassign [hreg] = val;
2261                                 if (spill)
2262                                         create_spilled_store (cfg, spill, val, hreg, ins);
2263                         }
2264
2265                         DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
2266                         rs->isymbolic [val] = hreg;
2267                         /* save reg allocating into unused */
2268                         ins->unused = val;
2269
2270                         /* check if we can free our long reg */
2271                         if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2272                                 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
2273                                 mono_regstate_free_int (rs, val);
2274                         }
2275                 }
2276                 else if (ins->dreg >= MONO_MAX_IREGS) {
2277                         int hreg;
2278                         val = rs->iassign [ins->dreg];
2279                         if (spec [MONO_INST_DEST] == 'l') {
2280                                 /* check special case when dreg have been moved from ecx (clob shift) */
2281                                 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2282                                         hreg = clob_dreg + 1;
2283                                 else
2284                                         hreg = ins->dreg + 1;
2285
2286                                 /* base prev_dreg on fixed hreg, handle clob case */
2287                                 prev_dreg = hreg - 1;
2288                         } else
2289                                 prev_dreg = ins->dreg;
2290
2291                         if (val < 0) {
2292                                 int spill = 0;
2293                                 if (val < -1) {
2294                                         /* the register gets spilled after this inst */
2295                                         spill = -val -1;
2296                                 }
2297                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2298                                 rs->iassign [ins->dreg] = val;
2299                                 if (spill)
2300                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2301                         }
2302                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2303                         rs->isymbolic [val] = prev_dreg;
2304                         ins->dreg = val;
2305                         /* handle cases where lreg needs to be eax:edx */
2306                         if (spec [MONO_INST_DEST] == 'l') {
2307                                 /* check special case when dreg have been moved from ecx (clob shift) */
2308                                 int hreg = prev_dreg + 1;
2309                                 val = rs->iassign [hreg];
2310                                 if (val < 0) {
2311                                         int spill = 0;
2312                                         if (val < -1) {
2313                                                 /* the register gets spilled after this inst */
2314                                                 spill = -val -1;
2315                                         }
2316                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2317                                         rs->iassign [hreg] = val;
2318                                         if (spill)
2319                                                 create_spilled_store (cfg, spill, val, hreg, ins);
2320                                 }
2321                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
2322                                 rs->isymbolic [val] = hreg;
2323                                 if (ins->dreg == X86_EAX) {
2324                                         if (val != X86_EDX)
2325                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2326                                 } else if (ins->dreg == X86_EDX) {
2327                                         if (val == X86_EAX) {
2328                                                 /* swap */
2329                                                 g_assert_not_reached ();
2330                                         } else {
2331                                                 /* two forced copies */
2332                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2333                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2334                                         }
2335                                 } else {
2336                                         if (val == X86_EDX) {
2337                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2338                                         } else {
2339                                                 /* two forced copies */
2340                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2341                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2342                                         }
2343                                 }
2344                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2345                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
2346                                         mono_regstate_free_int (rs, val);
2347                                 }
2348                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != X86_EAX && spec [MONO_INST_CLOB] != 'd') {
2349                                 /* this instruction only outputs to EAX, need to copy */
2350                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2351                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != X86_EDX && spec [MONO_INST_CLOB] != 'd') {
2352                                 create_copy_ins (cfg, ins->dreg, X86_EDX, ins);
2353                         }
2354                 }
2355                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
2356                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
2357                         mono_regstate_free_int (rs, ins->dreg);
2358                 }
2359                 /* put src1 in EAX if it needs to be */
2360                 if (spec [MONO_INST_SRC1] == 'a') {
2361                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
2362                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
2363                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2364                                 mono_regstate_free_int (rs, X86_EAX);
2365                         }
2366                         if (ins->sreg1 < MONO_MAX_IREGS) {
2367                                 /* The argument is already in a hard reg, need to copy */
2368                                 MonoInst *copy = create_copy_ins (cfg, X86_EAX, ins->sreg1, NULL);
2369                                 insert_before_ins (ins, tmp, copy);
2370                         }
2371                         else
2372                                 /* force-set sreg1 */
2373                                 assign_ireg (rs, ins->sreg1, X86_EAX);
2374                         ins->sreg1 = X86_EAX;
2375                 }
2376
2377                 /*
2378                  * TRACK SREG1
2379                  */
2380                 if (spec [MONO_INST_SRC1] == 'f') {
2381                         if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
2382                                 MonoInst *load;
2383                                 MonoInst *store = NULL;
2384
2385                                 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2386                                         GList *spill_node;
2387                                         spill_node = g_list_first (fspill_list);
2388                                         g_assert (spill_node);
2389
2390                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);          
2391                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2392                                 }
2393
2394                                 fspill++;
2395                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2396                                 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
2397                                 insert_before_ins (ins, tmp, load);
2398                                 if (store) 
2399                                         insert_before_ins (load, tmp, store);
2400                         }
2401                 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
2402                         /* force source to be same as dest */
2403                         assign_ireg (rs, ins->sreg1, ins->dreg);
2404                         assign_ireg (rs, ins->sreg1 + 1, ins->unused);
2405
2406                         DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
2407                         DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
2408
2409                         ins->sreg1 = ins->dreg;
2410                         /* 
2411                          * No need for saving the reg, we know that src1=dest in this cases
2412                          * ins->inst_c0 = ins->unused;
2413                          */
2414                 }
2415                 else if (ins->sreg1 >= MONO_MAX_IREGS) {
2416                         val = rs->iassign [ins->sreg1];
2417                         prev_sreg1 = ins->sreg1;
2418                         if (val < 0) {
2419                                 int spill = 0;
2420                                 if (val < -1) {
2421                                         /* the register gets spilled after this inst */
2422                                         spill = -val -1;
2423                                 }
2424                                 if (0 && ins->opcode == OP_MOVE) {
2425                                         /* 
2426                                          * small optimization: the dest register is already allocated
2427                                          * but the src one is not: we can simply assign the same register
2428                                          * here and peephole will get rid of the instruction later.
2429                                          * This optimization may interfere with the clobbering handling:
2430                                          * it removes a mov operation that will be added again to handle clobbering.
2431                                          * There are also some other issues that should with make testjit.
2432                                          */
2433                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
2434                                         val = rs->iassign [ins->sreg1] = ins->dreg;
2435                                         //g_assert (val >= 0);
2436                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2437                                 } else {
2438                                         //g_assert (val == -1); /* source cannot be spilled */
2439                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
2440                                         rs->iassign [ins->sreg1] = val;
2441                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2442                                 }
2443                                 if (spill) {
2444                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
2445                                         insert_before_ins (ins, tmp, store);
2446                                 }
2447                         }
2448                         rs->isymbolic [val] = prev_sreg1;
2449                         ins->sreg1 = val;
2450                 } else {
2451                         prev_sreg1 = -1;
2452                 }
2453                 /* handle clobbering of sreg1 */
2454                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
2455                         MonoInst *sreg2_copy = NULL;
2456                         MonoInst *copy = NULL;
2457
2458                         if (ins->dreg == ins->sreg2) {
2459                                 /* 
2460                                  * copying sreg1 to dreg could clobber sreg2, so allocate a new
2461                                  * register for it.
2462                                  */
2463                                 int reg2 = 0;
2464
2465                                 reg2 = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->sreg2, 0);
2466
2467                                 DEBUG (g_print ("\tneed to copy sreg2 %s to reg %s\n", mono_arch_regname (ins->sreg2), mono_arch_regname (reg2)));
2468                                 sreg2_copy = create_copy_ins (cfg, reg2, ins->sreg2, NULL);
2469                                 prev_sreg2 = ins->sreg2 = reg2;
2470
2471                                 mono_regstate_free_int (rs, reg2);
2472                         }
2473
2474                         copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
2475                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
2476                         insert_before_ins (ins, tmp, copy);
2477
2478                         if (sreg2_copy)
2479                                 insert_before_ins (copy, tmp, sreg2_copy);
2480
2481                         /*
2482                          * Need to prevent sreg2 to be allocated to sreg1, since that
2483                          * would screw up the previous copy.
2484                          */
2485                         src2_mask &= ~ (1 << ins->sreg1);
2486                         /* we set sreg1 to dest as well */
2487                         prev_sreg1 = ins->sreg1 = ins->dreg;
2488                         src2_mask &= ~ (1 << ins->dreg);
2489                 }
2490
2491                 /*
2492                  * TRACK SREG2
2493                  */
2494                 if (spec [MONO_INST_SRC2] == 'f') {
2495                         if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
2496                                 MonoInst *load;
2497                                 MonoInst *store = NULL;
2498
2499                                 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2500                                         GList *spill_node;
2501
2502                                         spill_node = g_list_first (fspill_list);
2503                                         g_assert (spill_node);
2504                                         if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
2505                                                 spill_node = g_list_next (spill_node);
2506         
2507                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
2508                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2509                                 } 
2510                                 
2511                                 fspill++;
2512                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2513                                 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
2514                                 insert_before_ins (ins, tmp, load);
2515                                 if (store) 
2516                                         insert_before_ins (load, tmp, store);
2517                         }
2518                 } 
2519                 else if (ins->sreg2 >= MONO_MAX_IREGS) {
2520                         val = rs->iassign [ins->sreg2];
2521                         prev_sreg2 = ins->sreg2;
2522                         if (val < 0) {
2523                                 int spill = 0;
2524                                 if (val < -1) {
2525                                         /* the register gets spilled after this inst */
2526                                         spill = -val -1;
2527                                 }
2528                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
2529                                 rs->iassign [ins->sreg2] = val;
2530                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
2531                                 if (spill)
2532                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
2533                         }
2534                         rs->isymbolic [val] = prev_sreg2;
2535                         ins->sreg2 = val;
2536                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != X86_ECX) {
2537                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [X86_ECX]));
2538                         }
2539                 } else {
2540                         prev_sreg2 = -1;
2541                 }
2542
2543                 if (spec [MONO_INST_CLOB] == 'c') {
2544                         int j, s;
2545                         guint32 clob_mask = X86_CALLEE_REGS;
2546                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
2547                                 s = 1 << j;
2548                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2549                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2550                                 }
2551                         }
2552                 }
2553                 if (spec [MONO_INST_CLOB] == 'a') {
2554                         guint32 clob_reg = X86_EAX;
2555                         if (!(rs->ifree_mask & (1 << clob_reg)) && (rs->isymbolic [clob_reg] >= 8)) {
2556                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2557                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2558                                 mono_regstate_free_int (rs, clob_reg);
2559                         }
2560                 }
2561                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2562                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2563                         mono_regstate_free_int (rs, ins->sreg1);
2564                 }
2565                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2566                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2567                         mono_regstate_free_int (rs, ins->sreg2);
2568                 }*/
2569         
2570                 //DEBUG (print_ins (i, ins));
2571                 /* this may result from a insert_before call */
2572                 if (!tmp->next)
2573                         bb->code = tmp->data;
2574                 tmp = tmp->next;
2575         }
2576
2577         g_free (reginfo);
2578         g_free (reginfof);
2579         g_list_free (fspill_list);
2580 }
2581
2582 static unsigned char*
2583 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2584 {
2585         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2586         x86_fnstcw_membase(code, X86_ESP, 0);
2587         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
2588         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2589         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
2590         x86_fldcw_membase (code, X86_ESP, 2);
2591         if (size == 8) {
2592                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2593                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2594                 x86_pop_reg (code, dreg);
2595                 /* FIXME: need the high register 
2596                  * x86_pop_reg (code, dreg_high);
2597                  */
2598         } else {
2599                 x86_push_reg (code, X86_EAX); // SP = SP - 4
2600                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
2601                 x86_pop_reg (code, dreg);
2602         }
2603         x86_fldcw_membase (code, X86_ESP, 0);
2604         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2605
2606         if (size == 1)
2607                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
2608         else if (size == 2)
2609                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
2610         return code;
2611 }
2612
2613 static unsigned char*
2614 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2615 {
2616         int sreg = tree->sreg1;
2617         int need_touch = FALSE;
2618
2619 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
2620         need_touch = TRUE;
2621 #endif
2622
2623         if (need_touch) {
2624                 guint8* br[5];
2625
2626                 /*
2627                  * Under Windows:
2628                  * If requested stack size is larger than one page,
2629                  * perform stack-touch operation
2630                  */
2631                 /*
2632                  * Generate stack probe code.
2633                  * Under Windows, it is necessary to allocate one page at a time,
2634                  * "touching" stack after each successful sub-allocation. This is
2635                  * because of the way stack growth is implemented - there is a
2636                  * guard page before the lowest stack page that is currently commited.
2637                  * Stack normally grows sequentially so OS traps access to the
2638                  * guard page and commits more pages when needed.
2639                  */
2640                 x86_test_reg_imm (code, sreg, ~0xFFF);
2641                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2642
2643                 br[2] = code; /* loop */
2644                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
2645                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
2646                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2647                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2648                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2649                 x86_patch (br[3], br[2]);
2650                 x86_test_reg_reg (code, sreg, sreg);
2651                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2652                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2653
2654                 br[1] = code; x86_jump8 (code, 0);
2655
2656                 x86_patch (br[0], code);
2657                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2658                 x86_patch (br[1], code);
2659                 x86_patch (br[4], code);
2660         }
2661         else
2662                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
2663
2664         if (tree->flags & MONO_INST_INIT) {
2665                 int offset = 0;
2666                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
2667                         x86_push_reg (code, X86_EAX);
2668                         offset += 4;
2669                 }
2670                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
2671                         x86_push_reg (code, X86_ECX);
2672                         offset += 4;
2673                 }
2674                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
2675                         x86_push_reg (code, X86_EDI);
2676                         offset += 4;
2677                 }
2678                 
2679                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
2680                 if (sreg != X86_ECX)
2681                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
2682                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2683                                 
2684                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2685                 x86_cld (code);
2686                 x86_prefix (code, X86_REP_PREFIX);
2687                 x86_stosl (code);
2688                 
2689                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2690                         x86_pop_reg (code, X86_EDI);
2691                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2692                         x86_pop_reg (code, X86_ECX);
2693                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2694                         x86_pop_reg (code, X86_EAX);
2695         }
2696         return code;
2697 }
2698
2699
2700 static guint8*
2701 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2702 {
2703         CallInfo *cinfo;
2704         int quad;
2705
2706         /* Move return value to the target register */
2707         switch (ins->opcode) {
2708         case CEE_CALL:
2709         case OP_CALL_REG:
2710         case OP_CALL_MEMBASE:
2711                 if (ins->dreg != X86_EAX)
2712                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2713                 break;
2714         case OP_VCALL:
2715         case OP_VCALL_REG:
2716         case OP_VCALL_MEMBASE:
2717                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
2718                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2719                         /* Pop the destination address from the stack */
2720                         x86_pop_reg (code, X86_ECX);
2721                         
2722                         for (quad = 0; quad < 2; quad ++) {
2723                                 switch (cinfo->ret.pair_storage [quad]) {
2724                                 case ArgInIReg:
2725                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
2726                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
2727                                         break;
2728                                 case ArgNone:
2729                                         break;
2730                                 default:
2731                                         g_assert_not_reached ();
2732                                 }
2733                         }
2734                 }
2735                 g_free (cinfo);
2736         default:
2737                 break;
2738         }
2739
2740         return code;
2741 }
2742
2743 static guint8*
2744 emit_tls_get (guint8* code, int dreg, int tls_offset)
2745 {
2746 #ifdef PLATFORM_WIN32
2747         /* 
2748          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
2749          * Journal and/or a disassembly of the TlsGet () function.
2750          */
2751         g_assert (tls_offset < 64);
2752         x86_prefix (code, X86_FS_PREFIX);
2753         x86_mov_reg_mem (code, dreg, 0x18, 4);
2754         /* Dunno what this does but TlsGetValue () contains it */
2755         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2756         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2757 #else
2758         x86_prefix (code, X86_GS_PREFIX);
2759         x86_mov_reg_mem (code, dreg, tls_offset, 4);                    
2760 #endif
2761         return code;
2762 }
2763
2764 #define REAL_PRINT_REG(text,reg) \
2765 mono_assert (reg >= 0); \
2766 x86_push_reg (code, X86_EAX); \
2767 x86_push_reg (code, X86_EDX); \
2768 x86_push_reg (code, X86_ECX); \
2769 x86_push_reg (code, reg); \
2770 x86_push_imm (code, reg); \
2771 x86_push_imm (code, text " %d %p\n"); \
2772 x86_mov_reg_imm (code, X86_EAX, printf); \
2773 x86_call_reg (code, X86_EAX); \
2774 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2775 x86_pop_reg (code, X86_ECX); \
2776 x86_pop_reg (code, X86_EDX); \
2777 x86_pop_reg (code, X86_EAX);
2778
2779 /* benchmark and set based on cpu */
2780 #define LOOP_ALIGNMENT 8
2781 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2782
2783 void
2784 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2785 {
2786         MonoInst *ins;
2787         MonoCallInst *call;
2788         guint offset;
2789         guint8 *code = cfg->native_code + cfg->code_len;
2790         MonoInst *last_ins = NULL;
2791         guint last_offset = 0;
2792         int max_len, cpos;
2793
2794         if (cfg->opt & MONO_OPT_PEEPHOLE)
2795                 peephole_pass (cfg, bb);
2796
2797         if (cfg->opt & MONO_OPT_LOOP) {
2798                 int pad, align = LOOP_ALIGNMENT;
2799                 /* set alignment depending on cpu */
2800                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2801                         pad = align - pad;
2802                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2803                         x86_padding (code, pad);
2804                         cfg->code_len += pad;
2805                         bb->native_offset = cfg->code_len;
2806                 }
2807         }
2808
2809         if (cfg->verbose_level > 2)
2810                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2811
2812         cpos = bb->max_offset;
2813
2814         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2815                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2816                 g_assert (!cfg->compile_aot);
2817                 cpos += 6;
2818
2819                 cov->data [bb->dfn].cil_code = bb->cil_code;
2820                 /* this is not thread save, but good enough */
2821                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2822         }
2823
2824         offset = code - cfg->native_code;
2825
2826         ins = bb->code;
2827         while (ins) {
2828                 offset = code - cfg->native_code;
2829
2830                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2831
2832                 if (offset > (cfg->code_size - max_len - 16)) {
2833                         cfg->code_size *= 2;
2834                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2835                         code = cfg->native_code + offset;
2836                         mono_jit_stats.code_reallocs++;
2837                 }
2838
2839                 mono_debug_record_line_number (cfg, ins, offset);
2840
2841                 switch (ins->opcode) {
2842                 case OP_BIGMUL:
2843                         x86_mul_reg (code, ins->sreg2, TRUE);
2844                         break;
2845                 case OP_BIGMUL_UN:
2846                         x86_mul_reg (code, ins->sreg2, FALSE);
2847                         break;
2848                 case OP_X86_SETEQ_MEMBASE:
2849                 case OP_X86_SETNE_MEMBASE:
2850                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2851                                          ins->inst_basereg, ins->inst_offset, TRUE);
2852                         break;
2853                 case OP_STOREI1_MEMBASE_IMM:
2854                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2855                         break;
2856                 case OP_STOREI2_MEMBASE_IMM:
2857                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2858                         break;
2859                 case OP_STORE_MEMBASE_IMM:
2860                 case OP_STOREI4_MEMBASE_IMM:
2861                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2862                         break;
2863                 case OP_STOREI1_MEMBASE_REG:
2864                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2865                         break;
2866                 case OP_STOREI2_MEMBASE_REG:
2867                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2868                         break;
2869                 case OP_STORE_MEMBASE_REG:
2870                 case OP_STOREI4_MEMBASE_REG:
2871                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2872                         break;
2873                 case CEE_LDIND_I:
2874                 case CEE_LDIND_I4:
2875                 case CEE_LDIND_U4:
2876                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2877                         break;
2878                 case OP_LOADU4_MEM:
2879                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2880                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2881                         break;
2882                 case OP_LOAD_MEMBASE:
2883                 case OP_LOADI4_MEMBASE:
2884                 case OP_LOADU4_MEMBASE:
2885                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2886                         break;
2887                 case OP_LOADU1_MEMBASE:
2888                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2889                         break;
2890                 case OP_LOADI1_MEMBASE:
2891                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2892                         break;
2893                 case OP_LOADU2_MEMBASE:
2894                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2895                         break;
2896                 case OP_LOADI2_MEMBASE:
2897                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2898                         break;
2899                 case CEE_CONV_I1:
2900                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2901                         break;
2902                 case CEE_CONV_I2:
2903                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2904                         break;
2905                 case CEE_CONV_U1:
2906                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2907                         break;
2908                 case CEE_CONV_U2:
2909                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2910                         break;
2911                 case OP_COMPARE:
2912                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2913                         break;
2914                 case OP_COMPARE_IMM:
2915                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2916                         break;
2917                 case OP_X86_COMPARE_MEMBASE_REG:
2918                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2919                         break;
2920                 case OP_X86_COMPARE_MEMBASE_IMM:
2921                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2922                         break;
2923                 case OP_X86_COMPARE_MEMBASE8_IMM:
2924                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2925                         break;
2926                 case OP_X86_COMPARE_REG_MEMBASE:
2927                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2928                         break;
2929                 case OP_X86_COMPARE_MEM_IMM:
2930                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2931                         break;
2932                 case OP_X86_TEST_NULL:
2933                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2934                         break;
2935                 case OP_X86_ADD_MEMBASE_IMM:
2936                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2937                         break;
2938                 case OP_X86_ADD_MEMBASE:
2939                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2940                         break;
2941                 case OP_X86_SUB_MEMBASE_IMM:
2942                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2943                         break;
2944                 case OP_X86_SUB_MEMBASE:
2945                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2946                         break;
2947                 case OP_X86_INC_MEMBASE:
2948                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2949                         break;
2950                 case OP_X86_INC_REG:
2951                         x86_inc_reg (code, ins->dreg);
2952                         break;
2953                 case OP_X86_DEC_MEMBASE:
2954                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2955                         break;
2956                 case OP_X86_DEC_REG:
2957                         x86_dec_reg (code, ins->dreg);
2958                         break;
2959                 case OP_X86_MUL_MEMBASE:
2960                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2961                         break;
2962                 case CEE_BREAK:
2963                         x86_breakpoint (code);
2964                         break;
2965                 case OP_ADDCC:
2966                 case CEE_ADD:
2967                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2968                         break;
2969                 case OP_ADC:
2970                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2971                         break;
2972                 case OP_ADDCC_IMM:
2973                 case OP_ADD_IMM:
2974                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2975                         break;
2976                 case OP_ADC_IMM:
2977                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2978                         break;
2979                 case OP_SUBCC:
2980                 case CEE_SUB:
2981                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2982                         break;
2983                 case OP_SBB:
2984                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2985                         break;
2986                 case OP_SUBCC_IMM:
2987                 case OP_SUB_IMM:
2988                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2989                         break;
2990                 case OP_SBB_IMM:
2991                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2992                         break;
2993                 case CEE_AND:
2994                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2995                         break;
2996                 case OP_AND_IMM:
2997                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2998                         break;
2999                 case CEE_DIV:
3000                         x86_cdq (code);
3001                         x86_div_reg (code, ins->sreg2, TRUE);
3002                         break;
3003                 case CEE_DIV_UN:
3004                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
3005                         x86_div_reg (code, ins->sreg2, FALSE);
3006                         break;
3007                 case OP_DIV_IMM:
3008                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3009                         x86_cdq (code);
3010                         x86_div_reg (code, ins->sreg2, TRUE);
3011                         break;
3012                 case CEE_REM:
3013                         x86_cdq (code);
3014                         x86_div_reg (code, ins->sreg2, TRUE);
3015                         break;
3016                 case CEE_REM_UN:
3017                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
3018                         x86_div_reg (code, ins->sreg2, FALSE);
3019                         break;
3020                 case OP_REM_IMM:
3021                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
3022                         x86_cdq (code);
3023                         x86_div_reg (code, ins->sreg2, TRUE);
3024                         break;
3025                 case CEE_OR:
3026                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
3027                         break;
3028                 case OP_OR_IMM:
3029                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
3030                         break;
3031                 case CEE_XOR:
3032                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
3033                         break;
3034                 case OP_XOR_IMM:
3035                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
3036                         break;
3037                 case CEE_SHL:
3038                         g_assert (ins->sreg2 == X86_ECX);
3039                         x86_shift_reg (code, X86_SHL, ins->dreg);
3040                         break;
3041                 case CEE_SHR:
3042                         g_assert (ins->sreg2 == X86_ECX);
3043                         x86_shift_reg (code, X86_SAR, ins->dreg);
3044                         break;
3045                 case OP_SHR_IMM:
3046                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
3047                         break;
3048                 case OP_SHR_UN_IMM:
3049                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
3050                         break;
3051                 case CEE_SHR_UN:
3052                         g_assert (ins->sreg2 == X86_ECX);
3053                         x86_shift_reg (code, X86_SHR, ins->dreg);
3054                         break;
3055                 case OP_SHL_IMM:
3056                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
3057                         break;
3058                 case OP_LSHL: {
3059                         guint8 *jump_to_end;
3060
3061                         /* handle shifts below 32 bits */
3062                         x86_shld_reg (code, ins->unused, ins->sreg1);
3063                         x86_shift_reg (code, X86_SHL, ins->sreg1);
3064
3065                         x86_test_reg_imm (code, X86_ECX, 32);
3066                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3067
3068                         /* handle shift over 32 bit */
3069                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3070                         x86_clear_reg (code, ins->sreg1);
3071                         
3072                         x86_patch (jump_to_end, code);
3073                         }
3074                         break;
3075                 case OP_LSHR: {
3076                         guint8 *jump_to_end;
3077
3078                         /* handle shifts below 32 bits */
3079                         x86_shrd_reg (code, ins->sreg1, ins->unused);
3080                         x86_shift_reg (code, X86_SAR, ins->unused);
3081
3082                         x86_test_reg_imm (code, X86_ECX, 32);
3083                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
3084
3085                         /* handle shifts over 31 bits */
3086                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3087                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
3088                         
3089                         x86_patch (jump_to_end, code);
3090                         }
3091                         break;
3092                 case OP_LSHR_UN: {
3093                         guint8 *jump_to_end;
3094
3095                         /* handle shifts below 32 bits */
3096                         x86_shrd_reg (code, ins->sreg1, ins->unused);
3097                         x86_shift_reg (code, X86_SHR, ins->unused);
3098
3099                         x86_test_reg_imm (code, X86_ECX, 32);
3100                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
3101
3102                         /* handle shifts over 31 bits */
3103                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3104                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
3105                         
3106                         x86_patch (jump_to_end, code);
3107                         }
3108                         break;
3109                 case OP_LSHL_IMM:
3110                         if (ins->inst_imm >= 32) {
3111                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3112                                 x86_clear_reg (code, ins->sreg1);
3113                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
3114                         } else {
3115                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
3116                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
3117                         }
3118                         break;
3119                 case OP_LSHR_IMM:
3120                         if (ins->inst_imm >= 32) {
3121                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
3122                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
3123                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
3124                         } else {
3125                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3126                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
3127                         }
3128                         break;
3129                 case OP_LSHR_UN_IMM:
3130                         if (ins->inst_imm >= 32) {
3131                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3132                                 x86_clear_reg (code, ins->unused);
3133                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
3134                         } else {
3135                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3136                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
3137                         }
3138                         break;
3139                 case CEE_NOT:
3140                         x86_not_reg (code, ins->sreg1);
3141                         break;
3142                 case CEE_NEG:
3143                         x86_neg_reg (code, ins->sreg1);
3144                         break;
3145                 case OP_SEXT_I1:
3146                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3147                         break;
3148                 case OP_SEXT_I2:
3149                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3150                         break;
3151                 case CEE_MUL:
3152                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3153                         break;
3154                 case OP_MUL_IMM:
3155                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
3156                         break;
3157                 case CEE_MUL_OVF:
3158                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3159                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3160                         break;
3161                 case CEE_MUL_OVF_UN: {
3162                         /* the mul operation and the exception check should most likely be split */
3163                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
3164                         /*g_assert (ins->sreg2 == X86_EAX);
3165                         g_assert (ins->dreg == X86_EAX);*/
3166                         if (ins->sreg2 == X86_EAX) {
3167                                 non_eax_reg = ins->sreg1;
3168                         } else if (ins->sreg1 == X86_EAX) {
3169                                 non_eax_reg = ins->sreg2;
3170                         } else {
3171                                 /* no need to save since we're going to store to it anyway */
3172                                 if (ins->dreg != X86_EAX) {
3173                                         saved_eax = TRUE;
3174                                         x86_push_reg (code, X86_EAX);
3175                                 }
3176                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
3177                                 non_eax_reg = ins->sreg2;
3178                         }
3179                         if (ins->dreg == X86_EDX) {
3180                                 if (!saved_eax) {
3181                                         saved_eax = TRUE;
3182                                         x86_push_reg (code, X86_EAX);
3183                                 }
3184                         } else if (ins->dreg != X86_EAX) {
3185                                 saved_edx = TRUE;
3186                                 x86_push_reg (code, X86_EDX);
3187                         }
3188                         x86_mul_reg (code, non_eax_reg, FALSE);
3189                         /* save before the check since pop and mov don't change the flags */
3190                         if (ins->dreg != X86_EAX)
3191                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3192                         if (saved_edx)
3193                                 x86_pop_reg (code, X86_EDX);
3194                         if (saved_eax)
3195                                 x86_pop_reg (code, X86_EAX);
3196                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3197                         break;
3198                 }
3199                 case OP_ICONST:
3200                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
3201                         break;
3202                 case OP_AOTCONST:
3203                         g_assert_not_reached ();
3204                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
3205                         x86_mov_reg_imm (code, ins->dreg, 0);
3206                         break;
3207                 case OP_LOAD_GOTADDR:
3208                         x86_call_imm (code, 0);
3209                         /* 
3210                          * The patch needs to point to the pop, since the GOT offset needs 
3211                          * to be added to that address.
3212                          */
3213                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3214                         x86_pop_reg (code, ins->dreg);
3215                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
3216                         break;
3217                 case OP_GOT_ENTRY:
3218                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3219                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
3220                         break;
3221                 case OP_X86_PUSH_GOT_ENTRY:
3222                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3223                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
3224                         break;
3225                 case CEE_CONV_I4:
3226                 case OP_MOVE:
3227                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3228                         break;
3229                 case CEE_CONV_U4:
3230                         g_assert_not_reached ();
3231                 case CEE_JMP: {
3232                         /*
3233                          * Note: this 'frame destruction' logic is useful for tail calls, too.
3234                          * Keep in sync with the code in emit_epilog.
3235                          */
3236                         int pos = 0;
3237
3238                         /* FIXME: no tracing support... */
3239                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3240                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3241                         /* reset offset to make max_len work */
3242                         offset = code - cfg->native_code;
3243
3244                         g_assert (!cfg->method->save_lmf);
3245
3246                         if (cfg->used_int_regs & (1 << X86_EBX))
3247                                 pos -= 4;
3248                         if (cfg->used_int_regs & (1 << X86_EDI))
3249                                 pos -= 4;
3250                         if (cfg->used_int_regs & (1 << X86_ESI))
3251                                 pos -= 4;
3252                         if (pos)
3253                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3254         
3255                         if (cfg->used_int_regs & (1 << X86_ESI))
3256                                 x86_pop_reg (code, X86_ESI);
3257                         if (cfg->used_int_regs & (1 << X86_EDI))
3258                                 x86_pop_reg (code, X86_EDI);
3259                         if (cfg->used_int_regs & (1 << X86_EBX))
3260                                 x86_pop_reg (code, X86_EBX);
3261         
3262                         /* restore ESP/EBP */
3263                         x86_leave (code);
3264                         offset = code - cfg->native_code;
3265                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3266                         x86_jump32 (code, 0);
3267                         break;
3268                 }
3269                 case OP_CHECK_THIS:
3270                         /* ensure ins->sreg1 is not NULL
3271                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
3272                          * cmp DWORD PTR [eax], 0
3273                          */
3274                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
3275                         break;
3276                 case OP_ARGLIST: {
3277                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
3278                         x86_push_reg (code, hreg);
3279                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
3280                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
3281                         x86_pop_reg (code, hreg);
3282                         break;
3283                 }
3284                 case OP_FCALL:
3285                 case OP_LCALL:
3286                 case OP_VCALL:
3287                 case OP_VOIDCALL:
3288                 case CEE_CALL:
3289                         call = (MonoCallInst*)ins;
3290                         if (ins->flags & MONO_INST_HAS_METHOD)
3291                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
3292                         else
3293                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
3294                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3295                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
3296                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
3297                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
3298                                  * smart enough to do that optimization yet
3299                                  *
3300                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
3301                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
3302                                  * (most likely from locality benefits). People with other processors should
3303                                  * check on theirs to see what happens.
3304                                  */
3305                                 if (call->stack_usage == 4) {
3306                                         /* we want to use registers that won't get used soon, so use
3307                                          * ecx, as eax will get allocated first. edx is used by long calls,
3308                                          * so we can't use that.
3309                                          */
3310                                         
3311                                         x86_pop_reg (code, X86_ECX);
3312                                 } else {
3313                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3314                                 }
3315                         }
3316                         code = emit_move_return_value (cfg, ins, code);
3317                         break;
3318                 case OP_FCALL_REG:
3319                 case OP_LCALL_REG:
3320                 case OP_VCALL_REG:
3321                 case OP_VOIDCALL_REG:
3322                 case OP_CALL_REG:
3323                         call = (MonoCallInst*)ins;
3324                         x86_call_reg (code, ins->sreg1);
3325                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3326                                 if (call->stack_usage == 4)
3327                                         x86_pop_reg (code, X86_ECX);
3328                                 else
3329                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3330                         }
3331                         code = emit_move_return_value (cfg, ins, code);
3332                         break;
3333                 case OP_FCALL_MEMBASE:
3334                 case OP_LCALL_MEMBASE:
3335                 case OP_VCALL_MEMBASE:
3336                 case OP_VOIDCALL_MEMBASE:
3337                 case OP_CALL_MEMBASE:
3338                         call = (MonoCallInst*)ins;
3339                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
3340                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3341                                 if (call->stack_usage == 4)
3342                                         x86_pop_reg (code, X86_ECX);
3343                                 else
3344                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3345                         }
3346                         code = emit_move_return_value (cfg, ins, code);
3347                         break;
3348                 case OP_OUTARG:
3349                 case OP_X86_PUSH:
3350                         x86_push_reg (code, ins->sreg1);
3351                         break;
3352                 case OP_X86_PUSH_IMM:
3353                         x86_push_imm (code, ins->inst_imm);
3354                         break;
3355                 case OP_X86_PUSH_MEMBASE:
3356                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
3357                         break;
3358                 case OP_X86_PUSH_OBJ: 
3359                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
3360                         x86_push_reg (code, X86_EDI);
3361                         x86_push_reg (code, X86_ESI);
3362                         x86_push_reg (code, X86_ECX);
3363                         if (ins->inst_offset)
3364                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
3365                         else
3366                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
3367                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
3368                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
3369                         x86_cld (code);
3370                         x86_prefix (code, X86_REP_PREFIX);
3371                         x86_movsd (code);
3372                         x86_pop_reg (code, X86_ECX);
3373                         x86_pop_reg (code, X86_ESI);
3374                         x86_pop_reg (code, X86_EDI);
3375                         break;
3376                 case OP_X86_LEA:
3377                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
3378                         break;
3379                 case OP_X86_LEA_MEMBASE:
3380                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3381                         break;
3382                 case OP_X86_XCHG:
3383                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3384                         break;
3385                 case OP_LOCALLOC:
3386                         /* keep alignment */
3387                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
3388                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
3389                         code = mono_emit_stack_alloc (code, ins);
3390                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
3391                         break;
3392                 case CEE_RET:
3393                         x86_ret (code);
3394                         break;
3395                 case CEE_THROW: {
3396                         x86_push_reg (code, ins->sreg1);
3397                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3398                                                           (gpointer)"mono_arch_throw_exception");
3399                         break;
3400                 }
3401                 case OP_RETHROW: {
3402                         x86_push_reg (code, ins->sreg1);
3403                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3404                                                           (gpointer)"mono_arch_rethrow_exception");
3405                         break;
3406                 }
3407                 case OP_CALL_HANDLER: 
3408                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3409                         x86_call_imm (code, 0);
3410                         break;
3411                 case OP_LABEL:
3412                         ins->inst_c0 = code - cfg->native_code;
3413                         break;
3414                 case CEE_BR:
3415                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
3416                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
3417                         //break;
3418                         if (ins->flags & MONO_INST_BRLABEL) {
3419                                 if (ins->inst_i0->inst_c0) {
3420                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
3421                                 } else {
3422                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
3423                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3424                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
3425                                                 x86_jump8 (code, 0);
3426                                         else 
3427                                                 x86_jump32 (code, 0);
3428                                 }
3429                         } else {
3430                                 if (ins->inst_target_bb->native_offset) {
3431                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
3432                                 } else {
3433                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3434                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3435                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3436                                                 x86_jump8 (code, 0);
3437                                         else 
3438                                                 x86_jump32 (code, 0);
3439                                 } 
3440                         }
3441                         break;
3442                 case OP_BR_REG:
3443                         x86_jump_reg (code, ins->sreg1);
3444                         break;
3445                 case OP_CEQ:
3446                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3447                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3448                         break;
3449                 case OP_CLT:
3450                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
3451                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3452                         break;
3453                 case OP_CLT_UN:
3454                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3455                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3456                         break;
3457                 case OP_CGT:
3458                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
3459                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3460                         break;
3461                 case OP_CGT_UN:
3462                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3463                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3464                         break;
3465                 case OP_CNE:
3466                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
3467                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3468                         break;
3469                 case OP_COND_EXC_EQ:
3470                 case OP_COND_EXC_NE_UN:
3471                 case OP_COND_EXC_LT:
3472                 case OP_COND_EXC_LT_UN:
3473                 case OP_COND_EXC_GT:
3474                 case OP_COND_EXC_GT_UN:
3475                 case OP_COND_EXC_GE:
3476                 case OP_COND_EXC_GE_UN:
3477                 case OP_COND_EXC_LE:
3478                 case OP_COND_EXC_LE_UN:
3479                 case OP_COND_EXC_OV:
3480                 case OP_COND_EXC_NO:
3481                 case OP_COND_EXC_C:
3482                 case OP_COND_EXC_NC:
3483                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
3484                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3485                         break;
3486                 case CEE_BEQ:
3487                 case CEE_BNE_UN:
3488                 case CEE_BLT:
3489                 case CEE_BLT_UN:
3490                 case CEE_BGT:
3491                 case CEE_BGT_UN:
3492                 case CEE_BGE:
3493                 case CEE_BGE_UN:
3494                 case CEE_BLE:
3495                 case CEE_BLE_UN:
3496                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
3497                         break;
3498
3499                 /* floating point opcodes */
3500                 case OP_R8CONST: {
3501                         double d = *(double *)ins->inst_p0;
3502
3503                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3504                                 x86_fldz (code);
3505                         } else if (d == 1.0) {
3506                                 x86_fld1 (code);
3507                         } else {
3508                                 if (cfg->compile_aot) {
3509                                         guint32 *val = (guint32*)&d;
3510                                         x86_push_imm (code, val [1]);
3511                                         x86_push_imm (code, val [0]);
3512                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
3513                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3514                                 }
3515                                 else {
3516                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3517                                         x86_fld (code, NULL, TRUE);
3518                                 }
3519                         }
3520                         break;
3521                 }
3522                 case OP_R4CONST: {
3523                         float f = *(float *)ins->inst_p0;
3524
3525                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3526                                 x86_fldz (code);
3527                         } else if (f == 1.0) {
3528                                 x86_fld1 (code);
3529                         } else {
3530                                 if (cfg->compile_aot) {
3531                                         guint32 val = *(guint32*)&f;
3532                                         x86_push_imm (code, val);
3533                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
3534                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3535                                 }
3536                                 else {
3537                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3538                                         x86_fld (code, NULL, FALSE);
3539                                 }
3540                         }
3541                         break;
3542                 }
3543                 case OP_STORER8_MEMBASE_REG:
3544                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3545                         break;
3546                 case OP_LOADR8_SPILL_MEMBASE:
3547                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3548                         x86_fxch (code, 1);
3549                         break;
3550                 case OP_LOADR8_MEMBASE:
3551                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3552                         break;
3553                 case OP_STORER4_MEMBASE_REG:
3554                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3555                         break;
3556                 case OP_LOADR4_MEMBASE:
3557                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3558                         break;
3559                 case CEE_CONV_R4: /* FIXME: change precision */
3560                 case CEE_CONV_R8:
3561                         x86_push_reg (code, ins->sreg1);
3562                         x86_fild_membase (code, X86_ESP, 0, FALSE);
3563                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3564                         break;
3565                 case OP_X86_FP_LOAD_I8:
3566                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3567                         break;
3568                 case OP_X86_FP_LOAD_I4:
3569                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3570                         break;
3571                 case OP_FCONV_TO_I1:
3572                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3573                         break;
3574                 case OP_FCONV_TO_U1:
3575                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3576                         break;
3577                 case OP_FCONV_TO_I2:
3578                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3579                         break;
3580                 case OP_FCONV_TO_U2:
3581                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3582                         break;
3583                 case OP_FCONV_TO_I4:
3584                 case OP_FCONV_TO_I:
3585                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3586                         break;
3587                 case OP_FCONV_TO_I8:
3588                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3589                         x86_fnstcw_membase(code, X86_ESP, 0);
3590                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3591                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3592                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3593                         x86_fldcw_membase (code, X86_ESP, 2);
3594                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3595                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3596                         x86_pop_reg (code, ins->dreg);
3597                         x86_pop_reg (code, ins->unused);
3598                         x86_fldcw_membase (code, X86_ESP, 0);
3599                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3600                         break;
3601                 case OP_LCONV_TO_R_UN: { 
3602                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3603                         guint8 *br;
3604
3605                         /* load 64bit integer to FP stack */
3606                         x86_push_imm (code, 0);
3607                         x86_push_reg (code, ins->sreg2);
3608                         x86_push_reg (code, ins->sreg1);
3609                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3610                         /* store as 80bit FP value */
3611                         x86_fst80_membase (code, X86_ESP, 0);
3612                         
3613                         /* test if lreg is negative */
3614                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3615                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3616         
3617                         /* add correction constant mn */
3618                         x86_fld80_mem (code, mn);
3619                         x86_fld80_membase (code, X86_ESP, 0);
3620                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3621                         x86_fst80_membase (code, X86_ESP, 0);
3622
3623                         x86_patch (br, code);
3624
3625                         x86_fld80_membase (code, X86_ESP, 0);
3626                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
3627
3628                         break;
3629                 }
3630                 case OP_LCONV_TO_OVF_I: {
3631                         guint8 *br [3], *label [1];
3632
3633                         /* 
3634                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3635                          */
3636                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3637
3638                         /* If the low word top bit is set, see if we are negative */
3639                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3640                         /* We are not negative (no top bit set, check for our top word to be zero */
3641                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3642                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3643                         label [0] = code;
3644
3645                         /* throw exception */
3646                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3647                         x86_jump32 (code, 0);
3648         
3649                         x86_patch (br [0], code);
3650                         /* our top bit is set, check that top word is 0xfffffff */
3651                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3652                 
3653                         x86_patch (br [1], code);
3654                         /* nope, emit exception */
3655                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3656                         x86_patch (br [2], label [0]);
3657
3658                         if (ins->dreg != ins->sreg1)
3659                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3660                         break;
3661                 }
3662                 case OP_FADD:
3663                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3664                         break;
3665                 case OP_FSUB:
3666                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3667                         break;          
3668                 case OP_FMUL:
3669                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3670                         break;          
3671                 case OP_FDIV:
3672                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3673                         break;          
3674                 case OP_FNEG:
3675                         x86_fchs (code);
3676                         break;          
3677                 case OP_SIN:
3678                         x86_fsin (code);
3679                         x86_fldz (code);
3680                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3681                         break;          
3682                 case OP_COS:
3683                         x86_fcos (code);
3684                         x86_fldz (code);
3685                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3686                         break;          
3687                 case OP_ABS:
3688                         x86_fabs (code);
3689                         break;          
3690                 case OP_TAN: {
3691                         /* 
3692                          * it really doesn't make sense to inline all this code,
3693                          * it's here just to show that things may not be as simple 
3694                          * as they appear.
3695                          */
3696                         guchar *check_pos, *end_tan, *pop_jump;
3697                         x86_push_reg (code, X86_EAX);
3698                         x86_fptan (code);
3699                         x86_fnstsw (code);
3700                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3701                         check_pos = code;
3702                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3703                         x86_fstp (code, 0); /* pop the 1.0 */
3704                         end_tan = code;
3705                         x86_jump8 (code, 0);
3706                         x86_fldpi (code);
3707                         x86_fp_op (code, X86_FADD, 0);
3708                         x86_fxch (code, 1);
3709                         x86_fprem1 (code);
3710                         x86_fstsw (code);
3711                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3712                         pop_jump = code;
3713                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3714                         x86_fstp (code, 1);
3715                         x86_fptan (code);
3716                         x86_patch (pop_jump, code);
3717                         x86_fstp (code, 0); /* pop the 1.0 */
3718                         x86_patch (check_pos, code);
3719                         x86_patch (end_tan, code);
3720                         x86_fldz (code);
3721                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3722                         x86_pop_reg (code, X86_EAX);
3723                         break;
3724                 }
3725                 case OP_ATAN:
3726                         x86_fld1 (code);
3727                         x86_fpatan (code);
3728                         x86_fldz (code);
3729                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3730                         break;          
3731                 case OP_SQRT:
3732                         x86_fsqrt (code);
3733                         break;          
3734                 case OP_X86_FPOP:
3735                         x86_fstp (code, 0);
3736                         break;          
3737                 case OP_FREM: {
3738                         guint8 *l1, *l2;
3739
3740                         x86_push_reg (code, X86_EAX);
3741                         /* we need to exchange ST(0) with ST(1) */
3742                         x86_fxch (code, 1);
3743
3744                         /* this requires a loop, because fprem somtimes 
3745                          * returns a partial remainder */
3746                         l1 = code;
3747                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3748                         /* x86_fprem1 (code); */
3749                         x86_fprem (code);
3750                         x86_fnstsw (code);
3751                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3752                         l2 = code + 2;
3753                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3754
3755                         /* pop result */
3756                         x86_fstp (code, 1);
3757
3758                         x86_pop_reg (code, X86_EAX);
3759                         break;
3760                 }
3761                 case OP_FCOMPARE:
3762                         if (cfg->opt & MONO_OPT_FCMOV) {
3763                                 x86_fcomip (code, 1);
3764                                 x86_fstp (code, 0);
3765                                 break;
3766                         }
3767                         /* this overwrites EAX */
3768                         EMIT_FPCOMPARE(code);
3769                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3770                         break;
3771                 case OP_FCEQ:
3772                         if (cfg->opt & MONO_OPT_FCMOV) {
3773                                 /* zeroing the register at the start results in 
3774                                  * shorter and faster code (we can also remove the widening op)
3775                                  */
3776                                 guchar *unordered_check;
3777                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3778                                 x86_fcomip (code, 1);
3779                                 x86_fstp (code, 0);
3780                                 unordered_check = code;
3781                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3782                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3783                                 x86_patch (unordered_check, code);
3784                                 break;
3785                         }
3786                         if (ins->dreg != X86_EAX) 
3787                                 x86_push_reg (code, X86_EAX);
3788
3789                         EMIT_FPCOMPARE(code);
3790                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3791                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3792                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3793                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3794
3795                         if (ins->dreg != X86_EAX) 
3796                                 x86_pop_reg (code, X86_EAX);
3797                         break;
3798                 case OP_FCLT:
3799                 case OP_FCLT_UN:
3800                         if (cfg->opt & MONO_OPT_FCMOV) {
3801                                 /* zeroing the register at the start results in 
3802                                  * shorter and faster code (we can also remove the widening op)
3803                                  */
3804                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3805                                 x86_fcomip (code, 1);
3806                                 x86_fstp (code, 0);
3807                                 if (ins->opcode == OP_FCLT_UN) {
3808                                         guchar *unordered_check = code;
3809                                         guchar *jump_to_end;
3810                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3811                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3812                                         jump_to_end = code;
3813                                         x86_jump8 (code, 0);
3814                                         x86_patch (unordered_check, code);
3815                                         x86_inc_reg (code, ins->dreg);
3816                                         x86_patch (jump_to_end, code);
3817                                 } else {
3818                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3819                                 }
3820                                 break;
3821                         }
3822                         if (ins->dreg != X86_EAX) 
3823                                 x86_push_reg (code, X86_EAX);
3824
3825                         EMIT_FPCOMPARE(code);
3826                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3827                         if (ins->opcode == OP_FCLT_UN) {
3828                                 guchar *is_not_zero_check, *end_jump;
3829                                 is_not_zero_check = code;
3830                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3831                                 end_jump = code;
3832                                 x86_jump8 (code, 0);
3833                                 x86_patch (is_not_zero_check, code);
3834                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3835
3836                                 x86_patch (end_jump, code);
3837                         }
3838                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3839                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3840
3841                         if (ins->dreg != X86_EAX) 
3842                                 x86_pop_reg (code, X86_EAX);
3843                         break;
3844                 case OP_FCGT:
3845                 case OP_FCGT_UN:
3846                         if (cfg->opt & MONO_OPT_FCMOV) {
3847                                 /* zeroing the register at the start results in 
3848                                  * shorter and faster code (we can also remove the widening op)
3849                                  */
3850                                 guchar *unordered_check;
3851                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3852                                 x86_fcomip (code, 1);
3853                                 x86_fstp (code, 0);
3854                                 if (ins->opcode == OP_FCGT) {
3855                                         unordered_check = code;
3856                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3857                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3858                                         x86_patch (unordered_check, code);
3859                                 } else {
3860                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3861                                 }
3862                                 break;
3863                         }
3864                         if (ins->dreg != X86_EAX) 
3865                                 x86_push_reg (code, X86_EAX);
3866
3867                         EMIT_FPCOMPARE(code);
3868                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3869                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3870                         if (ins->opcode == OP_FCGT_UN) {
3871                                 guchar *is_not_zero_check, *end_jump;
3872                                 is_not_zero_check = code;
3873                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3874                                 end_jump = code;
3875                                 x86_jump8 (code, 0);
3876                                 x86_patch (is_not_zero_check, code);
3877                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3878         
3879                                 x86_patch (end_jump, code);
3880                         }
3881                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3882                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3883
3884                         if (ins->dreg != X86_EAX) 
3885                                 x86_pop_reg (code, X86_EAX);
3886                         break;
3887                 case OP_FBEQ:
3888                         if (cfg->opt & MONO_OPT_FCMOV) {
3889                                 guchar *jump = code;
3890                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3891                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3892                                 x86_patch (jump, code);
3893                                 break;
3894                         }
3895                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3896                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3897                         break;
3898                 case OP_FBNE_UN:
3899                         /* Branch if C013 != 100 */
3900                         if (cfg->opt & MONO_OPT_FCMOV) {
3901                                 /* branch if !ZF or (PF|CF) */
3902                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3903                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3904                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3905                                 break;
3906                         }
3907                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3908                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3909                         break;
3910                 case OP_FBLT:
3911                         if (cfg->opt & MONO_OPT_FCMOV) {
3912                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3913                                 break;
3914                         }
3915                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3916                         break;
3917                 case OP_FBLT_UN:
3918                         if (cfg->opt & MONO_OPT_FCMOV) {
3919                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3920                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3921                                 break;
3922                         }
3923                         if (ins->opcode == OP_FBLT_UN) {
3924                                 guchar *is_not_zero_check, *end_jump;
3925                                 is_not_zero_check = code;
3926                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3927                                 end_jump = code;
3928                                 x86_jump8 (code, 0);
3929                                 x86_patch (is_not_zero_check, code);
3930                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3931
3932                                 x86_patch (end_jump, code);
3933                         }
3934                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3935                         break;
3936                 case OP_FBGT:
3937                 case OP_FBGT_UN:
3938                         if (cfg->opt & MONO_OPT_FCMOV) {
3939                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3940                                 break;
3941                         }
3942                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3943                         if (ins->opcode == OP_FBGT_UN) {
3944                                 guchar *is_not_zero_check, *end_jump;
3945                                 is_not_zero_check = code;
3946                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3947                                 end_jump = code;
3948                                 x86_jump8 (code, 0);
3949                                 x86_patch (is_not_zero_check, code);
3950                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3951
3952                                 x86_patch (end_jump, code);
3953                         }
3954                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3955                         break;
3956                 case OP_FBGE:
3957                         /* Branch if C013 == 100 or 001 */
3958                         if (cfg->opt & MONO_OPT_FCMOV) {
3959                                 guchar *br1;
3960
3961                                 /* skip branch if C1=1 */
3962                                 br1 = code;
3963                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3964                                 /* branch if (C0 | C3) = 1 */
3965                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3966                                 x86_patch (br1, code);
3967                                 break;
3968                         }
3969                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3970                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3971                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3972                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3973                         break;
3974                 case OP_FBGE_UN:
3975                         /* Branch if C013 == 000 */
3976                         if (cfg->opt & MONO_OPT_FCMOV) {
3977                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3978                                 break;
3979                         }
3980                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3981                         break;
3982                 case OP_FBLE:
3983                         /* Branch if C013=000 or 100 */
3984                         if (cfg->opt & MONO_OPT_FCMOV) {
3985                                 guchar *br1;
3986
3987                                 /* skip branch if C1=1 */
3988                                 br1 = code;
3989                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3990                                 /* branch if C0=0 */
3991                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3992                                 x86_patch (br1, code);
3993                                 break;
3994                         }
3995                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3996                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3997                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3998                         break;
3999                 case OP_FBLE_UN:
4000                         /* Branch if C013 != 001 */
4001                         if (cfg->opt & MONO_OPT_FCMOV) {
4002                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
4003                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
4004                                 break;
4005                         }
4006                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
4007                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
4008                         break;
4009                 case CEE_CKFINITE: {
4010                         x86_push_reg (code, X86_EAX);
4011                         x86_fxam (code);
4012                         x86_fnstsw (code);
4013                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
4014                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
4015                         x86_pop_reg (code, X86_EAX);
4016                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
4017                         break;
4018                 }
4019                 case OP_TLS_GET: {
4020                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
4021                         break;
4022                 }
4023                 case OP_ATOMIC_ADD_I4: {
4024                         int dreg = ins->dreg;
4025
4026                         if (dreg == ins->inst_basereg) {
4027                                 x86_push_reg (code, ins->sreg2);
4028                                 dreg = ins->sreg2;
4029                         } 
4030                         
4031                         if (dreg != ins->sreg2)
4032                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
4033
4034                         x86_prefix (code, X86_LOCK_PREFIX);
4035                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4036
4037                         if (dreg != ins->dreg) {
4038                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4039                                 x86_pop_reg (code, dreg);
4040                         }
4041
4042                         break;
4043                 }
4044                 case OP_ATOMIC_ADD_NEW_I4: {
4045                         int dreg = ins->dreg;
4046
4047                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
4048                         if (ins->sreg2 == dreg) {
4049                                 if (dreg == X86_EBX) {
4050                                         dreg = X86_EDI;
4051                                         if (ins->inst_basereg == X86_EDI)
4052                                                 dreg = X86_ESI;
4053                                 } else {
4054                                         dreg = X86_EBX;
4055                                         if (ins->inst_basereg == X86_EBX)
4056                                                 dreg = X86_EDI;
4057                                 }
4058                         } else if (ins->inst_basereg == dreg) {
4059                                 if (dreg == X86_EBX) {
4060                                         dreg = X86_EDI;
4061                                         if (ins->sreg2 == X86_EDI)
4062                                                 dreg = X86_ESI;
4063                                 } else {
4064                                         dreg = X86_EBX;
4065                                         if (ins->sreg2 == X86_EBX)
4066                                                 dreg = X86_EDI;
4067                                 }
4068                         }
4069
4070                         if (dreg != ins->dreg) {
4071                                 x86_push_reg (code, dreg);
4072                         }
4073
4074                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
4075                         x86_prefix (code, X86_LOCK_PREFIX);
4076                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4077                         /* dreg contains the old value, add with sreg2 value */
4078                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
4079                         
4080                         if (ins->dreg != dreg) {
4081                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4082                                 x86_pop_reg (code, dreg);
4083                         }
4084
4085                         break;
4086                 }
4087                 case OP_ATOMIC_EXCHANGE_I4: {
4088                         guchar *br[2];
4089                         int sreg2 = ins->sreg2;
4090                         int breg = ins->inst_basereg;
4091
4092                         /* cmpxchg uses eax as comperand, need to make sure we can use it
4093                          * hack to overcome limits in x86 reg allocator 
4094                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
4095                          */
4096                         if (ins->dreg != X86_EAX)
4097                                 x86_push_reg (code, X86_EAX);
4098                         
4099                         /* We need the EAX reg for the cmpxchg */
4100                         if (ins->sreg2 == X86_EAX) {
4101                                 x86_push_reg (code, X86_EDX);
4102                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
4103                                 sreg2 = X86_EDX;
4104                         }
4105
4106                         if (breg == X86_EAX) {
4107                                 x86_push_reg (code, X86_ESI);
4108                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
4109                                 breg = X86_ESI;
4110                         }
4111
4112                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
4113
4114                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
4115                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
4116                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
4117                         x86_patch (br [1], br [0]);
4118
4119                         if (breg != ins->inst_basereg)
4120                                 x86_pop_reg (code, X86_ESI);
4121
4122                         if (ins->dreg != X86_EAX) {
4123                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
4124                                 x86_pop_reg (code, X86_EAX);
4125                         }
4126
4127                         if (ins->sreg2 != sreg2)
4128                                 x86_pop_reg (code, X86_EDX);
4129
4130                         break;
4131                 }
4132                 default:
4133                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
4134                         g_assert_not_reached ();
4135                 }
4136
4137                 if ((code - cfg->native_code - offset) > max_len) {
4138                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4139                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4140                         g_assert_not_reached ();
4141                 }
4142                
4143                 cpos += max_len;
4144
4145                 last_ins = ins;
4146                 last_offset = offset;
4147                 
4148                 ins = ins->next;
4149         }
4150
4151         cfg->code_len = code - cfg->native_code;
4152 }
4153
4154 void
4155 mono_arch_register_lowlevel_calls (void)
4156 {
4157 }
4158
4159 void
4160 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4161 {
4162         MonoJumpInfo *patch_info;
4163         gboolean compile_aot = !run_cctors;
4164
4165         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4166                 unsigned char *ip = patch_info->ip.i + code;
4167                 const unsigned char *target;
4168
4169                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4170
4171                 if (compile_aot) {
4172                         switch (patch_info->type) {
4173                         case MONO_PATCH_INFO_BB:
4174                         case MONO_PATCH_INFO_LABEL:
4175                                 break;
4176                         default:
4177                                 /* No need to patch these */
4178                                 continue;
4179                         }
4180                 }
4181
4182                 switch (patch_info->type) {
4183                 case MONO_PATCH_INFO_IP:
4184                         *((gconstpointer *)(ip)) = target;
4185                         break;
4186                 case MONO_PATCH_INFO_CLASS_INIT: {
4187                         guint8 *code = ip;
4188                         /* Might already been changed to a nop */
4189                         x86_call_code (code, 0);
4190                         x86_patch (ip, target);
4191                         break;
4192                 }
4193                 case MONO_PATCH_INFO_ABS:
4194                 case MONO_PATCH_INFO_METHOD:
4195                 case MONO_PATCH_INFO_METHOD_JUMP:
4196                 case MONO_PATCH_INFO_INTERNAL_METHOD:
4197                 case MONO_PATCH_INFO_BB:
4198                 case MONO_PATCH_INFO_LABEL:
4199                         x86_patch (ip, target);
4200                         break;
4201                 case MONO_PATCH_INFO_NONE:
4202                         break;
4203                 default: {
4204                         guint32 offset = mono_arch_get_patch_offset (ip);
4205                         *((gconstpointer *)(ip + offset)) = target;
4206                         break;
4207                 }
4208                 }
4209         }
4210 }
4211
4212 guint8 *
4213 mono_arch_emit_prolog (MonoCompile *cfg)
4214 {
4215         MonoMethod *method = cfg->method;
4216         MonoBasicBlock *bb;
4217         MonoMethodSignature *sig;
4218         MonoInst *inst;
4219         int alloc_size, pos, max_offset, i;
4220         guint8 *code;
4221
4222         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
4223         code = cfg->native_code = g_malloc (cfg->code_size);
4224
4225         x86_push_reg (code, X86_EBP);
4226         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4227
4228         alloc_size = - cfg->stack_offset;
4229         pos = 0;
4230
4231         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4232                 /* Might need to attach the thread to the JIT */
4233                 if (lmf_tls_offset != -1) {
4234                         guint8 *buf;
4235
4236                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
4237 #ifdef PLATFORM_WIN32
4238                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4239                         /* FIXME: Add a separate key for LMF to avoid this */
4240                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4241 #endif
4242                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
4243                         buf = code;
4244                         x86_branch8 (code, X86_CC_NE, 0, 0);
4245                         x86_push_imm (code, cfg->domain);
4246                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4247                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4248                         x86_patch (buf, code);
4249                 }
4250                 else {
4251                         g_assert (!cfg->compile_aot);
4252                         x86_push_imm (code, cfg->domain);
4253                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4254                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4255                 }
4256         }
4257
4258         if (method->save_lmf) {
4259                 pos += sizeof (MonoLMF);
4260
4261                 /* save the current IP */
4262                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4263                 x86_push_imm_template (code);
4264
4265                 /* save all caller saved regs */
4266                 x86_push_reg (code, X86_EBP);
4267                 x86_push_reg (code, X86_ESI);
4268                 x86_push_reg (code, X86_EDI);
4269                 x86_push_reg (code, X86_EBX);
4270
4271                 /* save method info */
4272                 x86_push_imm (code, method);
4273
4274                 /* get the address of lmf for the current thread */
4275                 /* 
4276                  * This is performance critical so we try to use some tricks to make
4277                  * it fast.
4278                  */
4279                 if (lmf_tls_offset != -1) {
4280                         /* Load lmf quicky using the GS register */
4281                         code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
4282 #ifdef PLATFORM_WIN32
4283                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4284                         /* FIXME: Add a separate key for LMF to avoid this */
4285                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4286 #endif
4287                 }
4288                 else {
4289                         if (cfg->compile_aot) {
4290                                 /* The GOT var does not exist yet */
4291                                 x86_call_imm (code, 0);
4292                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4293                                 x86_pop_reg (code, X86_EAX);
4294                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4295                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4296                                 x86_call_membase (code, X86_EAX, 0xf0f0f0f0);
4297                         }
4298                         else
4299                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4300                 }
4301
4302                 /* push lmf */
4303                 x86_push_reg (code, X86_EAX); 
4304                 /* push *lfm (previous_lmf) */
4305                 x86_push_membase (code, X86_EAX, 0);
4306                 /* *(lmf) = ESP */
4307                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4308         } else {
4309
4310                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4311                         x86_push_reg (code, X86_EBX);
4312                         pos += 4;
4313                 }
4314
4315                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4316                         x86_push_reg (code, X86_EDI);
4317                         pos += 4;
4318                 }
4319
4320                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4321                         x86_push_reg (code, X86_ESI);
4322                         pos += 4;
4323                 }
4324         }
4325
4326         alloc_size -= pos;
4327
4328         if (alloc_size) {
4329                 /* See mono_emit_stack_alloc */
4330 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
4331                 guint32 remaining_size = alloc_size;
4332                 while (remaining_size >= 0x1000) {
4333                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4334                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4335                         remaining_size -= 0x1000;
4336                 }
4337                 if (remaining_size)
4338                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4339 #else
4340                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4341 #endif
4342         }
4343
4344         /* compute max_offset in order to use short forward jumps */
4345         max_offset = 0;
4346         if (cfg->opt & MONO_OPT_BRANCH) {
4347                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4348                         MonoInst *ins = bb->code;
4349                         bb->max_offset = max_offset;
4350
4351                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4352                                 max_offset += 6;
4353                         /* max alignment for loops */
4354                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4355                                 max_offset += LOOP_ALIGNMENT;
4356
4357                         while (ins) {
4358                                 if (ins->opcode == OP_LABEL)
4359                                         ins->inst_c1 = max_offset;
4360                                 
4361                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
4362                                 ins = ins->next;
4363                         }
4364                 }
4365         }
4366
4367         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4368                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4369
4370         /* load arguments allocated to register from the stack */
4371         sig = mono_method_signature (method);
4372         pos = 0;
4373
4374         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4375                 inst = cfg->varinfo [pos];
4376                 if (inst->opcode == OP_REGVAR) {
4377                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4378                         if (cfg->verbose_level > 2)
4379                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4380                 }
4381                 pos++;
4382         }
4383
4384         cfg->code_len = code - cfg->native_code;
4385
4386         return code;
4387 }
4388
4389 void
4390 mono_arch_emit_epilog (MonoCompile *cfg)
4391 {
4392         MonoMethod *method = cfg->method;
4393         MonoMethodSignature *sig = mono_method_signature (method);
4394         int quad, pos;
4395         guint32 stack_to_pop;
4396         guint8 *code;
4397         int max_epilog_size = 16;
4398         CallInfo *cinfo;
4399         
4400         if (cfg->method->save_lmf)
4401                 max_epilog_size += 128;
4402         
4403         if (mono_jit_trace_calls != NULL)
4404                 max_epilog_size += 50;
4405
4406         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4407                 cfg->code_size *= 2;
4408                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4409                 mono_jit_stats.code_reallocs++;
4410         }
4411
4412         code = cfg->native_code + cfg->code_len;
4413
4414         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4415                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4416
4417         /* the code restoring the registers must be kept in sync with CEE_JMP */
4418         pos = 0;
4419         
4420         if (method->save_lmf) {
4421                 gint32 prev_lmf_reg;
4422
4423                 /* Find a spare register */
4424                 switch (sig->ret->type) {
4425                 case MONO_TYPE_I8:
4426                 case MONO_TYPE_U8:
4427                         prev_lmf_reg = X86_EDI;
4428                         cfg->used_int_regs |= (1 << X86_EDI);
4429                         break;
4430                 default:
4431                         prev_lmf_reg = X86_EDX;
4432                         break;
4433                 }
4434
4435                 /* reg = previous_lmf */
4436                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, -32, 4);
4437
4438                 /* ecx = lmf */
4439                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, -28, 4);
4440
4441                 /* *(lmf) = previous_lmf */
4442                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4443
4444                 /* restore caller saved regs */
4445                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4446                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, -20, 4);
4447                 }
4448
4449                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4450                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, -16, 4);
4451                 }
4452                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4453                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, -12, 4);
4454                 }
4455
4456                 /* EBP is restored by LEAVE */
4457         } else {
4458                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4459                         pos -= 4;
4460                 }
4461                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4462                         pos -= 4;
4463                 }
4464                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4465                         pos -= 4;
4466                 }
4467
4468                 if (pos)
4469                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4470
4471                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4472                         x86_pop_reg (code, X86_ESI);
4473                 }
4474                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4475                         x86_pop_reg (code, X86_EDI);
4476                 }
4477                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4478                         x86_pop_reg (code, X86_EBX);
4479                 }
4480         }
4481
4482         /* Load returned vtypes into registers if needed */
4483         cinfo = get_call_info (sig, FALSE);
4484         if (cinfo->ret.storage == ArgValuetypeInReg) {
4485                 for (quad = 0; quad < 2; quad ++) {
4486                         switch (cinfo->ret.pair_storage [quad]) {
4487                         case ArgInIReg:
4488                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
4489                                 break;
4490                         case ArgOnFloatFpStack:
4491                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
4492                                 break;
4493                         case ArgOnDoubleFpStack:
4494                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
4495                                 break;
4496                         case ArgNone:
4497                                 break;
4498                         default:
4499                                 g_assert_not_reached ();
4500                         }
4501                 }
4502         }
4503
4504         x86_leave (code);
4505
4506         if (CALLCONV_IS_STDCALL (sig)) {
4507                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4508
4509                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4510         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
4511                 stack_to_pop = 4;
4512         else
4513                 stack_to_pop = 0;
4514
4515         if (stack_to_pop)
4516                 x86_ret_imm (code, stack_to_pop);
4517         else
4518                 x86_ret (code);
4519
4520         g_free (cinfo);
4521
4522         cfg->code_len = code - cfg->native_code;
4523
4524         g_assert (cfg->code_len < cfg->code_size);
4525 }
4526
4527 void
4528 mono_arch_emit_exceptions (MonoCompile *cfg)
4529 {
4530         MonoJumpInfo *patch_info;
4531         int nthrows, i;
4532         guint8 *code;
4533         MonoClass *exc_classes [16];
4534         guint8 *exc_throw_start [16], *exc_throw_end [16];
4535         guint32 code_size;
4536         int exc_count = 0;
4537
4538         /* Compute needed space */
4539         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4540                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4541                         exc_count++;
4542         }
4543
4544         /* 
4545          * make sure we have enough space for exceptions
4546          * 16 is the size of two push_imm instructions and a call
4547          */
4548         if (cfg->compile_aot)
4549                 code_size = exc_count * 32;
4550         else
4551                 code_size = exc_count * 16;
4552
4553         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4554                 cfg->code_size *= 2;
4555                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4556                 mono_jit_stats.code_reallocs++;
4557         }
4558
4559         code = cfg->native_code + cfg->code_len;
4560
4561         nthrows = 0;
4562         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4563                 switch (patch_info->type) {
4564                 case MONO_PATCH_INFO_EXC: {
4565                         MonoClass *exc_class;
4566                         guint8 *buf, *buf2;
4567                         guint32 throw_ip;
4568
4569                         x86_patch (patch_info->ip.i + cfg->native_code, code);
4570
4571                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4572                         g_assert (exc_class);
4573                         throw_ip = patch_info->ip.i;
4574
4575                         /* Find a throw sequence for the same exception class */
4576                         for (i = 0; i < nthrows; ++i)
4577                                 if (exc_classes [i] == exc_class)
4578                                         break;
4579                         if (i < nthrows) {
4580                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4581                                 x86_jump_code (code, exc_throw_start [i]);
4582                                 patch_info->type = MONO_PATCH_INFO_NONE;
4583                         }
4584                         else {
4585                                 guint32 got_reg = X86_EAX;
4586                                 guint32 size;
4587
4588                                 /* Compute size of code following the push <OFFSET> */
4589                                 if (cfg->compile_aot) {
4590                                         size = 5 + 6;
4591                                         if (!cfg->got_var)
4592                                                 size += 32;
4593                                         else if (cfg->got_var->opcode == OP_REGOFFSET)
4594                                                 size += 6;
4595                                 }
4596                                 else
4597                                         size = 5 + 5;
4598
4599                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4600                                         /* Use the shorter form */
4601                                         buf = buf2 = code;
4602                                         x86_push_imm (code, 0);
4603                                 }
4604                                 else {
4605                                         buf = code;
4606                                         x86_push_imm (code, 0xf0f0f0f0);
4607                                         buf2 = code;
4608                                 }
4609
4610                                 if (nthrows < 16) {
4611                                         exc_classes [nthrows] = exc_class;
4612                                         exc_throw_start [nthrows] = code;
4613                                 }
4614
4615                                 if (cfg->compile_aot) {          
4616                                         /*
4617                                          * Since the patches are generated by the back end, there is                                     * no way to generate a got_var at this point.   
4618                                          */
4619                                         if (!cfg->got_var) {
4620                                                 x86_call_imm (code, 0);
4621                                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4622                                                 x86_pop_reg (code, X86_EAX);
4623                                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4624                                         }
4625                                         else {
4626                                                 if (cfg->got_var->opcode == OP_REGOFFSET)
4627                                                         x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
4628                                                 else
4629                                                         got_reg = cfg->got_var->dreg;
4630                                         }
4631                                 }
4632
4633                                 x86_push_imm (code, exc_class->type_token);
4634                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4635                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4636                                 patch_info->ip.i = code - cfg->native_code;
4637                                 if (cfg->compile_aot)
4638                                         x86_call_membase (code, got_reg, 0xf0f0f0f0);
4639                                 else
4640                                         x86_call_code (code, 0);
4641                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4642                                 while (buf < buf2)
4643                                         x86_nop (buf);
4644
4645                                 if (nthrows < 16) {
4646                                         exc_throw_end [nthrows] = code;
4647                                         nthrows ++;
4648                                 }
4649                         }
4650                         break;
4651                 }
4652                 default:
4653                         /* do nothing */
4654                         break;
4655                 }
4656         }
4657
4658         cfg->code_len = code - cfg->native_code;
4659
4660         g_assert (cfg->code_len < cfg->code_size);
4661 }
4662
4663 void
4664 mono_arch_flush_icache (guint8 *code, gint size)
4665 {
4666         /* not needed */
4667 }
4668
4669 void
4670 mono_arch_flush_register_windows (void)
4671 {
4672 }
4673
4674 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4675
4676 static void
4677 setup_stack (MonoJitTlsData *tls)
4678 {
4679         pthread_t self = pthread_self();
4680         pthread_attr_t attr;
4681         size_t stsize = 0;
4682         struct sigaltstack sa;
4683         guint8 *staddr = NULL;
4684         guint8 *current = (guint8*)&staddr;
4685
4686         if (mono_running_on_valgrind ())
4687                 return;
4688
4689         /* Determine stack boundaries */
4690 #ifdef HAVE_PTHREAD_GETATTR_NP
4691         pthread_getattr_np( self, &attr );
4692 #else
4693 #ifdef HAVE_PTHREAD_ATTR_GET_NP
4694         pthread_attr_get_np( self, &attr );
4695 #elif defined(sun)
4696         pthread_attr_init( &attr );
4697         pthread_attr_getstacksize( &attr, &stsize );
4698 #else
4699 #error "Not implemented"
4700 #endif
4701 #endif
4702 #ifndef sun
4703         pthread_attr_getstack( &attr, (void**)&staddr, &stsize );
4704 #endif
4705
4706         g_assert (staddr);
4707
4708         g_assert ((current > staddr) && (current < staddr + stsize));
4709
4710         tls->end_of_stack = staddr + stsize;
4711
4712         /*
4713          * threads created by nptl does not seem to have a guard page, and
4714          * since the main thread is not created by us, we can't even set one.
4715          * Increasing stsize fools the SIGSEGV signal handler into thinking this
4716          * is a stack overflow exception.
4717          */
4718         tls->stack_size = stsize + getpagesize ();
4719
4720         /* Setup an alternate signal stack */
4721         tls->signal_stack = mmap (0, SIGNAL_STACK_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
4722         tls->signal_stack_size = SIGNAL_STACK_SIZE;
4723
4724         g_assert (tls->signal_stack);
4725
4726         sa.ss_sp = tls->signal_stack;
4727         sa.ss_size = SIGNAL_STACK_SIZE;
4728         sa.ss_flags = SS_ONSTACK;
4729         sigaltstack (&sa, NULL);
4730 }
4731
4732 #endif
4733
4734 /*
4735  * Support for fast access to the thread-local lmf structure using the GS
4736  * segment register on NPTL + kernel 2.6.x.
4737  */
4738
4739 static gboolean tls_offset_inited = FALSE;
4740
4741 void
4742 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4743 {
4744         if (!tls_offset_inited) {
4745                 if (!getenv ("MONO_NO_TLS")) {
4746 #ifdef PLATFORM_WIN32
4747                         /* 
4748                          * We need to init this multiple times, since when we are first called, the key might not
4749                          * be initialized yet.
4750                          */
4751                         appdomain_tls_offset = mono_domain_get_tls_key ();
4752                         lmf_tls_offset = mono_get_jit_tls_key ();
4753                         thread_tls_offset = mono_thread_get_tls_key ();
4754
4755                         /* Only 64 tls entries can be accessed using inline code */
4756                         if (appdomain_tls_offset >= 64)
4757                                 appdomain_tls_offset = -1;
4758                         if (lmf_tls_offset >= 64)
4759                                 lmf_tls_offset = -1;
4760                         if (thread_tls_offset >= 64)
4761                                 thread_tls_offset = -1;
4762 #else
4763                         tls_offset_inited = TRUE;
4764                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4765                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4766                         thread_tls_offset = mono_thread_get_tls_offset ();
4767 #endif
4768                 }
4769         }               
4770
4771 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4772         setup_stack (tls);
4773 #endif
4774 }
4775
4776 void
4777 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4778 {
4779 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4780         struct sigaltstack sa;
4781
4782         sa.ss_sp = tls->signal_stack;
4783         sa.ss_size = SIGNAL_STACK_SIZE;
4784         sa.ss_flags = SS_DISABLE;
4785         sigaltstack  (&sa, NULL);
4786
4787         if (tls->signal_stack)
4788                 munmap (tls->signal_stack, SIGNAL_STACK_SIZE);
4789 #endif
4790 }
4791
4792 void
4793 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4794 {
4795
4796         /* add the this argument */
4797         if (this_reg != -1) {
4798                 MonoInst *this;
4799                 MONO_INST_NEW (cfg, this, OP_OUTARG);
4800                 this->type = this_type;
4801                 this->sreg1 = this_reg;
4802                 mono_bblock_add_inst (cfg->cbb, this);
4803         }
4804
4805         if (vt_reg != -1) {
4806                 CallInfo * cinfo = get_call_info (inst->signature, FALSE);
4807                 MonoInst *vtarg;
4808
4809                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4810                         /*
4811                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4812                          * the stack. Save the address here, so the call instruction can
4813                          * access it.
4814                          */
4815                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4816                         vtarg->inst_destbasereg = X86_ESP;
4817                         vtarg->inst_offset = inst->stack_usage;
4818                         vtarg->sreg1 = vt_reg;
4819                         mono_bblock_add_inst (cfg->cbb, vtarg);
4820                 }
4821                 else {
4822                         MonoInst *vtarg;
4823                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4824                         vtarg->type = STACK_MP;
4825                         vtarg->sreg1 = vt_reg;
4826                         mono_bblock_add_inst (cfg->cbb, vtarg);
4827                 }
4828
4829                 g_free (cinfo);
4830         }
4831 }
4832
4833
4834 MonoInst*
4835 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4836 {
4837         MonoInst *ins = NULL;
4838
4839         if (cmethod->klass == mono_defaults.math_class) {
4840                 if (strcmp (cmethod->name, "Sin") == 0) {
4841                         MONO_INST_NEW (cfg, ins, OP_SIN);
4842                         ins->inst_i0 = args [0];
4843                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4844                         MONO_INST_NEW (cfg, ins, OP_COS);
4845                         ins->inst_i0 = args [0];
4846                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4847                         MONO_INST_NEW (cfg, ins, OP_TAN);
4848                         ins->inst_i0 = args [0];
4849                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4850                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4851                         ins->inst_i0 = args [0];
4852                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4853                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4854                         ins->inst_i0 = args [0];
4855                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4856                         MONO_INST_NEW (cfg, ins, OP_ABS);
4857                         ins->inst_i0 = args [0];
4858                 }
4859 #if 0
4860                 /* OP_FREM is not IEEE compatible */
4861                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4862                         MONO_INST_NEW (cfg, ins, OP_FREM);
4863                         ins->inst_i0 = args [0];
4864                         ins->inst_i1 = args [1];
4865                 }
4866 #endif
4867         } else if(cmethod->klass->image == mono_defaults.corlib &&
4868                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4869                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4870
4871                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4872                         MonoInst *ins_iconst;
4873
4874                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4875                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4876                         ins_iconst->inst_c0 = 1;
4877
4878                         ins->inst_i0 = args [0];
4879                         ins->inst_i1 = ins_iconst;
4880                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4881                         MonoInst *ins_iconst;
4882
4883                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4884                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4885                         ins_iconst->inst_c0 = -1;
4886
4887                         ins->inst_i0 = args [0];
4888                         ins->inst_i1 = ins_iconst;
4889                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4890                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4891
4892                         ins->inst_i0 = args [0];
4893                         ins->inst_i1 = args [1];
4894                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4895                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_I4);
4896
4897                         ins->inst_i0 = args [0];
4898                         ins->inst_i1 = args [1];
4899                 }
4900         }
4901
4902         return ins;
4903 }
4904
4905
4906 gboolean
4907 mono_arch_print_tree (MonoInst *tree, int arity)
4908 {
4909         return 0;
4910 }
4911
4912 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4913 {
4914         MonoInst* ins;
4915         
4916         if (appdomain_tls_offset == -1)
4917                 return NULL;
4918
4919         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4920         ins->inst_offset = appdomain_tls_offset;
4921         return ins;
4922 }
4923
4924 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4925 {
4926         MonoInst* ins;
4927
4928         if (thread_tls_offset == -1)
4929                 return NULL;
4930
4931         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4932         ins->inst_offset = thread_tls_offset;
4933         return ins;
4934 }
4935
4936 guint32
4937 mono_arch_get_patch_offset (guint8 *code)
4938 {
4939         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4940                 return 2;
4941         else if ((code [0] == 0xba))
4942                 return 1;
4943         else if ((code [0] == 0x68))
4944                 /* push IMM */
4945                 return 1;
4946         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4947                 /* push <OFFSET>(<REG>) */
4948                 return 2;
4949         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4950                 /* call *<OFFSET>(<REG>) */
4951                 return 2;
4952         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4953                 /* fldl <ADDR> */
4954                 return 2;
4955         else if ((code [0] == 0x58) && (code [1] == 0x05))
4956                 /* pop %eax; add <OFFSET>, %eax */
4957                 return 2;
4958         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4959                 /* pop <REG>; add <OFFSET>, <REG> */
4960                 return 3;
4961         else {
4962                 g_assert_not_reached ();
4963                 return -1;
4964         }
4965 }
4966
4967 gpointer*
4968 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4969 {
4970         guint8 reg = 0;
4971         gint32 disp = 0;
4972
4973         /* go to the start of the call instruction
4974          *
4975          * address_byte = (m << 6) | (o << 3) | reg
4976          * call opcode: 0xff address_byte displacement
4977          * 0xff m=1,o=2 imm8
4978          * 0xff m=2,o=2 imm32
4979          */
4980         code -= 6;
4981         if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4982                 reg = code [4] & 0x07;
4983                 disp = (signed char)code [5];
4984         } else {
4985                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4986                         reg = code [1] & 0x07;
4987                         disp = *((gint32*)(code + 2));
4988                 } else if ((code [1] == 0xe8)) {
4989                         return NULL;
4990                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4991                         /*
4992                          * This is a interface call: should check the above code can't catch it earlier 
4993                          * 8b 40 30   mov    0x30(%eax),%eax
4994                          * ff 10      call   *(%eax)
4995                          */
4996                         disp = 0;
4997                         reg = code [5] & 0x07;
4998                 }
4999                 else
5000                         return NULL;
5001         }
5002
5003         return (gpointer*)(((gint32)(regs [reg])) + disp);
5004 }
5005
5006 gpointer* 
5007 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
5008 {
5009         guint8 reg = 0;
5010         gint32 disp = 0;
5011
5012         code -= 7;
5013         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
5014                 reg = x86_modrm_rm (code [1]);
5015                 disp = code [4];
5016
5017                 if (reg == X86_EAX)
5018                         return NULL;
5019                 else
5020                         return (gpointer*)(((gint32)(regs [reg])) + disp);
5021         }
5022
5023         return NULL;
5024 }