2005-02-26 Zoltan Varga <vargaz@freemail.hu>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14
15 #include <mono/metadata/appdomain.h>
16 #include <mono/metadata/debug-helpers.h>
17 #include <mono/metadata/threads.h>
18 #include <mono/metadata/profiler-private.h>
19 #include <mono/utils/mono-math.h>
20
21 #include "trace.h"
22 #include "mini-x86.h"
23 #include "inssel.h"
24 #include "cpu-pentium.h"
25
26 static gint lmf_tls_offset = -1;
27 static gint appdomain_tls_offset = -1;
28 static gint thread_tls_offset = -1;
29
30 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
31
32 #ifdef PLATFORM_WIN32
33 /* Under windows, the default pinvoke calling convention is stdcall */
34 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
35 #else
36 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
37 #endif
38
39 #define SIGNAL_STACK_SIZE (64 * 1024)
40
41 #define NOT_IMPLEMENTED g_assert_not_reached ()
42
43 const char*
44 mono_arch_regname (int reg) {
45         switch (reg) {
46         case X86_EAX: return "%eax";
47         case X86_EBX: return "%ebx";
48         case X86_ECX: return "%ecx";
49         case X86_EDX: return "%edx";
50         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
51         case X86_EDI: return "%edi";
52         case X86_ESI: return "%esi";
53         }
54         return "unknown";
55 }
56
57 typedef enum {
58         ArgInIReg,
59         ArgInFloatSSEReg,
60         ArgInDoubleSSEReg,
61         ArgOnStack,
62         ArgValuetypeInReg,
63         ArgOnFloatFpStack,
64         ArgOnDoubleFpStack,
65         ArgNone
66 } ArgStorage;
67
68 typedef struct {
69         gint16 offset;
70         gint8  reg;
71         ArgStorage storage;
72
73         /* Only if storage == ArgValuetypeInReg */
74         ArgStorage pair_storage [2];
75         gint8 pair_regs [2];
76 } ArgInfo;
77
78 typedef struct {
79         int nargs;
80         guint32 stack_usage;
81         guint32 reg_usage;
82         guint32 freg_usage;
83         gboolean need_stack_align;
84         ArgInfo ret;
85         ArgInfo sig_cookie;
86         ArgInfo args [1];
87 } CallInfo;
88
89 #define PARAM_REGS 0
90
91 #define FLOAT_PARAM_REGS 0
92
93 static X86_Reg_No param_regs [] = { };
94
95 #ifdef PLATFORM_WIN32
96 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
97 #endif
98
99 static void inline
100 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
101 {
102     ainfo->offset = *stack_size;
103
104     if (*gr >= PARAM_REGS) {
105                 ainfo->storage = ArgOnStack;
106                 (*stack_size) += sizeof (gpointer);
107     }
108     else {
109                 ainfo->storage = ArgInIReg;
110                 ainfo->reg = param_regs [*gr];
111                 (*gr) ++;
112     }
113 }
114
115 static void inline
116 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
117 {
118         ainfo->offset = *stack_size;
119
120         g_assert (PARAM_REGS == 0);
121         
122         ainfo->storage = ArgOnStack;
123         (*stack_size) += sizeof (gpointer) * 2;
124 }
125
126 static void inline
127 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
128 {
129     ainfo->offset = *stack_size;
130
131     if (*gr >= FLOAT_PARAM_REGS) {
132                 ainfo->storage = ArgOnStack;
133                 (*stack_size) += sizeof (gpointer);
134     }
135     else {
136                 /* A double register */
137                 if (is_double)
138                         ainfo->storage = ArgInDoubleSSEReg;
139                 else
140                         ainfo->storage = ArgInFloatSSEReg;
141                 ainfo->reg = *gr;
142                 (*gr) += 1;
143     }
144 }
145
146
147 static void
148 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
149                gboolean is_return,
150                guint32 *gr, guint32 *fr, guint32 *stack_size)
151 {
152         guint32 size;
153         MonoClass *klass;
154
155         klass = mono_class_from_mono_type (type);
156         if (sig->pinvoke) 
157                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
158         else 
159                 size = mono_type_stack_size (&klass->byval_arg, NULL);
160
161 #ifdef PLATFORM_WIN32
162         if (sig->pinvoke && is_return) {
163                 MonoMarshalType *info;
164
165                 /*
166                  * the exact rules are not very well documented, the code below seems to work with the 
167                  * code generated by gcc 3.3.3 -mno-cygwin.
168                  */
169                 info = mono_marshal_load_type_info (klass);
170                 g_assert (info);
171
172                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
173
174                 /* Special case structs with only a float member */
175                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
176                         ainfo->storage = ArgValuetypeInReg;
177                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
178                         return;
179                 }
180                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
181                         ainfo->storage = ArgValuetypeInReg;
182                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
183                         return;
184                 }               
185                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
186                         ainfo->storage = ArgValuetypeInReg;
187                         ainfo->pair_storage [0] = ArgInIReg;
188                         ainfo->pair_regs [0] = return_regs [0];
189                         if (info->native_size > 4) {
190                                 ainfo->pair_storage [1] = ArgInIReg;
191                                 ainfo->pair_regs [1] = return_regs [1];
192                         }
193                         return;
194                 }
195         }
196 #endif
197
198         ainfo->offset = *stack_size;
199         ainfo->storage = ArgOnStack;
200         *stack_size += ALIGN_TO (size, sizeof (gpointer));
201 }
202
203 /*
204  * get_call_info:
205  *
206  *  Obtain information about a call according to the calling convention.
207  * For x86 ELF, see the "System V Application Binary Interface Intel386 
208  * Architecture Processor Supplment, Fourth Edition" document for more
209  * information.
210  * For x86 win32, see ???.
211  */
212 static CallInfo*
213 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
214 {
215         guint32 i, gr, fr;
216         MonoType *ret_type;
217         int n = sig->hasthis + sig->param_count;
218         guint32 stack_size = 0;
219         CallInfo *cinfo;
220
221         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
222
223         gr = 0;
224         fr = 0;
225
226         /* return value */
227         {
228                 ret_type = mono_type_get_underlying_type (sig->ret);
229                 switch (ret_type->type) {
230                 case MONO_TYPE_BOOLEAN:
231                 case MONO_TYPE_I1:
232                 case MONO_TYPE_U1:
233                 case MONO_TYPE_I2:
234                 case MONO_TYPE_U2:
235                 case MONO_TYPE_CHAR:
236                 case MONO_TYPE_I4:
237                 case MONO_TYPE_U4:
238                 case MONO_TYPE_I:
239                 case MONO_TYPE_U:
240                 case MONO_TYPE_PTR:
241                 case MONO_TYPE_CLASS:
242                 case MONO_TYPE_OBJECT:
243                 case MONO_TYPE_SZARRAY:
244                 case MONO_TYPE_ARRAY:
245                 case MONO_TYPE_STRING:
246                         cinfo->ret.storage = ArgInIReg;
247                         cinfo->ret.reg = X86_EAX;
248                         break;
249                 case MONO_TYPE_U8:
250                 case MONO_TYPE_I8:
251                         cinfo->ret.storage = ArgInIReg;
252                         cinfo->ret.reg = X86_EAX;
253                         break;
254                 case MONO_TYPE_R4:
255                         cinfo->ret.storage = ArgOnFloatFpStack;
256                         break;
257                 case MONO_TYPE_R8:
258                         cinfo->ret.storage = ArgOnDoubleFpStack;
259                         break;
260                 case MONO_TYPE_VALUETYPE: {
261                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
262
263                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
264                         if (cinfo->ret.storage == ArgOnStack)
265                                 /* The caller passes the address where the value is stored */
266                                 add_general (&gr, &stack_size, &cinfo->ret);
267                         break;
268                 }
269                 case MONO_TYPE_TYPEDBYREF:
270                         /* Same as a valuetype with size 24 */
271                         add_general (&gr, &stack_size, &cinfo->ret);
272                         ;
273                         break;
274                 case MONO_TYPE_VOID:
275                         cinfo->ret.storage = ArgNone;
276                         break;
277                 default:
278                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
279                 }
280         }
281
282         /* this */
283         if (sig->hasthis)
284                 add_general (&gr, &stack_size, cinfo->args + 0);
285
286         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
287                 gr = PARAM_REGS;
288                 fr = FLOAT_PARAM_REGS;
289                 
290                 /* Emit the signature cookie just before the implicit arguments */
291                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
292         }
293
294         for (i = 0; i < sig->param_count; ++i) {
295                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
296                 MonoType *ptype;
297
298                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
299                         /* We allways pass the sig cookie on the stack for simplicity */
300                         /* 
301                          * Prevent implicit arguments + the sig cookie from being passed 
302                          * in registers.
303                          */
304                         gr = PARAM_REGS;
305                         fr = FLOAT_PARAM_REGS;
306
307                         /* Emit the signature cookie just before the implicit arguments */
308                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
309                 }
310
311                 if (sig->params [i]->byref) {
312                         add_general (&gr, &stack_size, ainfo);
313                         continue;
314                 }
315                 ptype = mono_type_get_underlying_type (sig->params [i]);
316                 switch (ptype->type) {
317                 case MONO_TYPE_BOOLEAN:
318                 case MONO_TYPE_I1:
319                 case MONO_TYPE_U1:
320                         add_general (&gr, &stack_size, ainfo);
321                         break;
322                 case MONO_TYPE_I2:
323                 case MONO_TYPE_U2:
324                 case MONO_TYPE_CHAR:
325                         add_general (&gr, &stack_size, ainfo);
326                         break;
327                 case MONO_TYPE_I4:
328                 case MONO_TYPE_U4:
329                         add_general (&gr, &stack_size, ainfo);
330                         break;
331                 case MONO_TYPE_I:
332                 case MONO_TYPE_U:
333                 case MONO_TYPE_PTR:
334                 case MONO_TYPE_CLASS:
335                 case MONO_TYPE_OBJECT:
336                 case MONO_TYPE_STRING:
337                 case MONO_TYPE_SZARRAY:
338                 case MONO_TYPE_ARRAY:
339                         add_general (&gr, &stack_size, ainfo);
340                         break;
341                 case MONO_TYPE_VALUETYPE:
342                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
343                         break;
344                 case MONO_TYPE_TYPEDBYREF:
345                         stack_size += sizeof (MonoTypedRef);
346                         ainfo->storage = ArgOnStack;
347                         break;
348                 case MONO_TYPE_U8:
349                 case MONO_TYPE_I8:
350                         add_general_pair (&gr, &stack_size, ainfo);
351                         break;
352                 case MONO_TYPE_R4:
353                         add_float (&fr, &stack_size, ainfo, FALSE);
354                         break;
355                 case MONO_TYPE_R8:
356                         add_float (&fr, &stack_size, ainfo, TRUE);
357                         break;
358                 default:
359                         g_assert_not_reached ();
360                 }
361         }
362
363         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
364                 gr = PARAM_REGS;
365                 fr = FLOAT_PARAM_REGS;
366                 
367                 /* Emit the signature cookie just before the implicit arguments */
368                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
369         }
370
371         cinfo->stack_usage = stack_size;
372         cinfo->reg_usage = gr;
373         cinfo->freg_usage = fr;
374         return cinfo;
375 }
376
377 /*
378  * mono_arch_get_argument_info:
379  * @csig:  a method signature
380  * @param_count: the number of parameters to consider
381  * @arg_info: an array to store the result infos
382  *
383  * Gathers information on parameters such as size, alignment and
384  * padding. arg_info should be large enought to hold param_count + 1 entries. 
385  *
386  * Returns the size of the activation frame.
387  */
388 int
389 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
390 {
391         int k, frame_size = 0;
392         int size, align, pad;
393         int offset = 8;
394         CallInfo *cinfo;
395
396         cinfo = get_call_info (csig, FALSE);
397
398         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
399                 frame_size += sizeof (gpointer);
400                 offset += 4;
401         }
402
403         arg_info [0].offset = offset;
404
405         if (csig->hasthis) {
406                 frame_size += sizeof (gpointer);
407                 offset += 4;
408         }
409
410         arg_info [0].size = frame_size;
411
412         for (k = 0; k < param_count; k++) {
413                 
414                 if (csig->pinvoke)
415                         size = mono_type_native_stack_size (csig->params [k], &align);
416                 else
417                         size = mono_type_stack_size (csig->params [k], &align);
418
419                 /* ignore alignment for now */
420                 align = 1;
421
422                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
423                 arg_info [k].pad = pad;
424                 frame_size += size;
425                 arg_info [k + 1].pad = 0;
426                 arg_info [k + 1].size = size;
427                 offset += pad;
428                 arg_info [k + 1].offset = offset;
429                 offset += size;
430         }
431
432         align = MONO_ARCH_FRAME_ALIGNMENT;
433         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
434         arg_info [k].pad = pad;
435
436         g_free (cinfo);
437
438         return frame_size;
439 }
440
441 static const guchar cpuid_impl [] = {
442         0x55,                           /* push   %ebp */
443         0x89, 0xe5,                     /* mov    %esp,%ebp */
444         0x53,                           /* push   %ebx */
445         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
446         0x0f, 0xa2,                     /* cpuid   */
447         0x50,                           /* push   %eax */
448         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
449         0x89, 0x18,                     /* mov    %ebx,(%eax) */
450         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
451         0x89, 0x08,                     /* mov    %ecx,(%eax) */
452         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
453         0x89, 0x10,                     /* mov    %edx,(%eax) */
454         0x58,                           /* pop    %eax */
455         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
456         0x89, 0x02,                     /* mov    %eax,(%edx) */
457         0x5b,                           /* pop    %ebx */
458         0xc9,                           /* leave   */
459         0xc3,                           /* ret     */
460 };
461
462 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
463
464 static int 
465 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
466 {
467         int have_cpuid = 0;
468         __asm__  __volatile__ (
469                 "pushfl\n"
470                 "popl %%eax\n"
471                 "movl %%eax, %%edx\n"
472                 "xorl $0x200000, %%eax\n"
473                 "pushl %%eax\n"
474                 "popfl\n"
475                 "pushfl\n"
476                 "popl %%eax\n"
477                 "xorl %%edx, %%eax\n"
478                 "andl $0x200000, %%eax\n"
479                 "movl %%eax, %0"
480                 : "=r" (have_cpuid)
481                 :
482                 : "%eax", "%edx"
483         );
484
485         if (have_cpuid) {
486                 /* Have to use the code manager to get around WinXP DEP */
487                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
488                 CpuidFunc func;
489                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
490                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
491
492                 func = (CpuidFunc)ptr;
493                 func (id, p_eax, p_ebx, p_ecx, p_edx);
494
495                 mono_code_manager_destroy (codeman);
496
497                 /*
498                  * We use this approach because of issues with gcc and pic code, see:
499                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
500                 __asm__ __volatile__ ("cpuid"
501                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
502                         : "a" (id));
503                 */
504                 return 1;
505         }
506         return 0;
507 }
508
509 /*
510  * Initialize the cpu to execute managed code.
511  */
512 void
513 mono_arch_cpu_init (void)
514 {
515         guint16 fpcw;
516
517         /* spec compliance requires running with double precision */
518         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
519         fpcw &= ~X86_FPCW_PRECC_MASK;
520         fpcw |= X86_FPCW_PREC_DOUBLE;
521         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
522         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
523
524         mono_x86_tramp_init ();
525 }
526
527 /*
528  * This function returns the optimizations supported on this cpu.
529  */
530 guint32
531 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
532 {
533         int eax, ebx, ecx, edx;
534         guint32 opts = 0;
535         
536         *exclude_mask = 0;
537         /* Feature Flags function, flags returned in EDX. */
538         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
539                 if (edx & (1 << 15)) {
540                         opts |= MONO_OPT_CMOV;
541                         if (edx & 1)
542                                 opts |= MONO_OPT_FCMOV;
543                         else
544                                 *exclude_mask |= MONO_OPT_FCMOV;
545                 } else
546                         *exclude_mask |= MONO_OPT_CMOV;
547         }
548         return opts;
549 }
550
551 /*
552  * Determine whenever the trap whose info is in SIGINFO is caused by
553  * integer overflow.
554  */
555 gboolean
556 mono_arch_is_int_overflow (void *sigctx, void *info)
557 {
558         struct sigcontext *ctx = (struct sigcontext*)sigctx;
559         guint8* ip;
560
561         ip = (guint8*)ctx->SC_EIP;
562
563         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
564                 gint32 reg;
565
566                 /* idiv REG */
567                 switch (x86_modrm_rm (ip [1])) {
568                 case X86_ECX:
569                         reg = ctx->SC_ECX;
570                         break;
571                 case X86_EBX:
572                         reg = ctx->SC_EBX;
573                         break;
574                 default:
575                         g_assert_not_reached ();
576                         reg = -1;
577                 }
578
579                 if (reg == -1)
580                         return TRUE;
581         }
582                         
583         return FALSE;
584 }
585
586 static gboolean
587 is_regsize_var (MonoType *t) {
588         if (t->byref)
589                 return TRUE;
590         switch (mono_type_get_underlying_type (t)->type) {
591         case MONO_TYPE_I4:
592         case MONO_TYPE_U4:
593         case MONO_TYPE_I:
594         case MONO_TYPE_U:
595         case MONO_TYPE_PTR:
596                 return TRUE;
597         case MONO_TYPE_OBJECT:
598         case MONO_TYPE_STRING:
599         case MONO_TYPE_CLASS:
600         case MONO_TYPE_SZARRAY:
601         case MONO_TYPE_ARRAY:
602                 return TRUE;
603         case MONO_TYPE_VALUETYPE:
604                 return FALSE;
605         }
606         return FALSE;
607 }
608
609 GList *
610 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
611 {
612         GList *vars = NULL;
613         int i;
614
615         for (i = 0; i < cfg->num_varinfo; i++) {
616                 MonoInst *ins = cfg->varinfo [i];
617                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
618
619                 /* unused vars */
620                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
621                         continue;
622
623                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
624                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
625                         continue;
626
627                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
628                  * 8bit quantities in caller saved registers on x86 */
629                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
630                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
631                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
632                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
633                         g_assert (i == vmv->idx);
634                         vars = g_list_prepend (vars, vmv);
635                 }
636         }
637
638         vars = mono_varlist_sort (cfg, vars, 0);
639
640         return vars;
641 }
642
643 GList *
644 mono_arch_get_global_int_regs (MonoCompile *cfg)
645 {
646         GList *regs = NULL;
647
648         /* we can use 3 registers for global allocation */
649         regs = g_list_prepend (regs, (gpointer)X86_EBX);
650         regs = g_list_prepend (regs, (gpointer)X86_ESI);
651         regs = g_list_prepend (regs, (gpointer)X86_EDI);
652
653         return regs;
654 }
655
656 /*
657  * mono_arch_regalloc_cost:
658  *
659  *  Return the cost, in number of memory references, of the action of 
660  * allocating the variable VMV into a register during global register
661  * allocation.
662  */
663 guint32
664 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
665 {
666         MonoInst *ins = cfg->varinfo [vmv->idx];
667
668         if (cfg->method->save_lmf)
669                 /* The register is already saved */
670                 return (ins->opcode == OP_ARG) ? 1 : 0;
671         else
672                 /* push+pop+possible load if it is an argument */
673                 return (ins->opcode == OP_ARG) ? 3 : 2;
674 }
675  
676 /*
677  * Set var information according to the calling convention. X86 version.
678  * The locals var stuff should most likely be split in another method.
679  */
680 void
681 mono_arch_allocate_vars (MonoCompile *m)
682 {
683         MonoMethodSignature *sig;
684         MonoMethodHeader *header;
685         MonoInst *inst;
686         guint32 locals_stack_size, locals_stack_align;
687         int i, offset, curinst, size, align;
688         gint32 *offsets;
689         CallInfo *cinfo;
690
691         header = mono_method_get_header (m->method);
692         sig = mono_method_signature (m->method);
693
694         offset = 8;
695         curinst = 0;
696
697         cinfo = get_call_info (sig, FALSE);
698
699         switch (cinfo->ret.storage) {
700         case ArgOnStack:
701                 m->ret->opcode = OP_REGOFFSET;
702                 m->ret->inst_basereg = X86_EBP;
703                 m->ret->inst_offset = offset;
704                 offset += sizeof (gpointer);
705                 break;
706         case ArgValuetypeInReg:
707                 break;
708         case ArgInIReg:
709                 m->ret->opcode = OP_REGVAR;
710                 m->ret->inst_c0 = cinfo->ret.reg;
711                 break;
712         case ArgNone:
713         case ArgOnFloatFpStack:
714         case ArgOnDoubleFpStack:
715                 break;
716         default:
717                 g_assert_not_reached ();
718         }
719
720         if (sig->hasthis) {
721                 inst = m->varinfo [curinst];
722                 if (inst->opcode != OP_REGVAR) {
723                         inst->opcode = OP_REGOFFSET;
724                         inst->inst_basereg = X86_EBP;
725                 }
726                 inst->inst_offset = offset;
727                 offset += sizeof (gpointer);
728                 curinst++;
729         }
730
731         if (sig->call_convention == MONO_CALL_VARARG) {
732                 m->sig_cookie = offset;
733                 offset += sizeof (gpointer);
734         }
735
736         for (i = 0; i < sig->param_count; ++i) {
737                 inst = m->varinfo [curinst];
738                 if (inst->opcode != OP_REGVAR) {
739                         inst->opcode = OP_REGOFFSET;
740                         inst->inst_basereg = X86_EBP;
741                 }
742                 inst->inst_offset = offset;
743                 size = mono_type_size (sig->params [i], &align);
744                 size += 4 - 1;
745                 size &= ~(4 - 1);
746                 offset += size;
747                 curinst++;
748         }
749
750         offset = 0;
751
752         /* reserve space to save LMF and caller saved registers */
753
754         if (m->method->save_lmf) {
755                 offset += sizeof (MonoLMF);
756         } else {
757                 if (m->used_int_regs & (1 << X86_EBX)) {
758                         offset += 4;
759                 }
760
761                 if (m->used_int_regs & (1 << X86_EDI)) {
762                         offset += 4;
763                 }
764
765                 if (m->used_int_regs & (1 << X86_ESI)) {
766                         offset += 4;
767                 }
768         }
769
770         switch (cinfo->ret.storage) {
771         case ArgValuetypeInReg:
772                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
773                 offset += 8;
774                 m->ret->opcode = OP_REGOFFSET;
775                 m->ret->inst_basereg = X86_EBP;
776                 m->ret->inst_offset = - offset;
777                 break;
778         default:
779                 break;
780         }
781
782         /* Allocate locals */
783         offsets = mono_allocate_stack_slots (m, &locals_stack_size, &locals_stack_align);
784         if (locals_stack_align) {
785                 offset += (locals_stack_align - 1);
786                 offset &= ~(locals_stack_align - 1);
787         }
788         for (i = m->locals_start; i < m->num_varinfo; i++) {
789                 if (offsets [i] != -1) {
790                         MonoInst *inst = m->varinfo [i];
791                         inst->opcode = OP_REGOFFSET;
792                         inst->inst_basereg = X86_EBP;
793                         inst->inst_offset = - (offset + offsets [i]);
794                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
795                 }
796         }
797         g_free (offsets);
798         offset += locals_stack_size;
799
800         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
801         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
802
803         g_free (cinfo);
804
805         /* change sign? */
806         m->stack_offset = -offset;
807 }
808
809 void
810 mono_arch_create_vars (MonoCompile *cfg)
811 {
812         MonoMethodSignature *sig;
813         CallInfo *cinfo;
814
815         sig = mono_method_signature (cfg->method);
816
817         cinfo = get_call_info (sig, FALSE);
818
819         if (cinfo->ret.storage == ArgValuetypeInReg)
820                 cfg->ret_var_is_local = TRUE;
821
822         g_free (cinfo);
823 }
824
825 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
826  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
827  */
828
829 /* 
830  * take the arguments and generate the arch-specific
831  * instructions to properly call the function in call.
832  * This includes pushing, moving arguments to the right register
833  * etc.
834  * Issue: who does the spilling if needed, and when?
835  */
836 MonoCallInst*
837 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
838         MonoInst *arg, *in;
839         MonoMethodSignature *sig;
840         int i, n, stack_size, type;
841         MonoType *ptype;
842         CallInfo *cinfo;
843
844         stack_size = 0;
845         /* add the vararg cookie before the non-implicit args */
846         if (call->signature->call_convention == MONO_CALL_VARARG) {
847                 MonoInst *sig_arg;
848                 /* FIXME: Add support for signature tokens to AOT */
849                 cfg->disable_aot = TRUE;
850                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
851                 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
852                 sig_arg->inst_p0 = call->signature;
853                 arg->inst_left = sig_arg;
854                 arg->type = STACK_PTR;
855                 /* prepend, so they get reversed */
856                 arg->next = call->out_args;
857                 call->out_args = arg;
858                 stack_size += sizeof (gpointer);
859         }
860         sig = call->signature;
861         n = sig->param_count + sig->hasthis;
862
863         cinfo = get_call_info (sig, FALSE);
864
865         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
866                 if (cinfo->ret.storage == ArgOnStack)
867                         stack_size += sizeof (gpointer);
868         }
869
870         for (i = 0; i < n; ++i) {
871                 if (is_virtual && i == 0) {
872                         /* the argument will be attached to the call instrucion */
873                         in = call->args [i];
874                         stack_size += 4;
875                 } else {
876                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
877                         in = call->args [i];
878                         arg->cil_code = in->cil_code;
879                         arg->inst_left = in;
880                         arg->type = in->type;
881                         /* prepend, so they get reversed */
882                         arg->next = call->out_args;
883                         call->out_args = arg;
884                         if (i >= sig->hasthis) {
885                                 MonoType *t = sig->params [i - sig->hasthis];
886                                 ptype = mono_type_get_underlying_type (t);
887                                 if (t->byref)
888                                         type = MONO_TYPE_U;
889                                 else
890                                         type = ptype->type;
891                                 /* FIXME: validate arguments... */
892                                 switch (type) {
893                                 case MONO_TYPE_I:
894                                 case MONO_TYPE_U:
895                                 case MONO_TYPE_BOOLEAN:
896                                 case MONO_TYPE_CHAR:
897                                 case MONO_TYPE_I1:
898                                 case MONO_TYPE_U1:
899                                 case MONO_TYPE_I2:
900                                 case MONO_TYPE_U2:
901                                 case MONO_TYPE_I4:
902                                 case MONO_TYPE_U4:
903                                 case MONO_TYPE_STRING:
904                                 case MONO_TYPE_CLASS:
905                                 case MONO_TYPE_OBJECT:
906                                 case MONO_TYPE_PTR:
907                                 case MONO_TYPE_FNPTR:
908                                 case MONO_TYPE_ARRAY:
909                                 case MONO_TYPE_SZARRAY:
910                                         stack_size += 4;
911                                         break;
912                                 case MONO_TYPE_I8:
913                                 case MONO_TYPE_U8:
914                                         stack_size += 8;
915                                         break;
916                                 case MONO_TYPE_R4:
917                                         stack_size += 4;
918                                         arg->opcode = OP_OUTARG_R4;
919                                         break;
920                                 case MONO_TYPE_R8:
921                                         stack_size += 8;
922                                         arg->opcode = OP_OUTARG_R8;
923                                         break;
924                                 case MONO_TYPE_VALUETYPE: {
925                                         int size;
926                                         if (sig->pinvoke) 
927                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
928                                         else 
929                                                 size = mono_type_stack_size (&in->klass->byval_arg, NULL);
930
931                                         stack_size += size;
932                                         arg->opcode = OP_OUTARG_VT;
933                                         arg->klass = in->klass;
934                                         arg->unused = sig->pinvoke;
935                                         arg->inst_imm = size; 
936                                         break;
937                                 }
938                                 case MONO_TYPE_TYPEDBYREF:
939                                         stack_size += sizeof (MonoTypedRef);
940                                         arg->opcode = OP_OUTARG_VT;
941                                         arg->klass = in->klass;
942                                         arg->unused = sig->pinvoke;
943                                         arg->inst_imm = sizeof (MonoTypedRef); 
944                                         break;
945                                 default:
946                                         g_error ("unknown type 0x%02x in mono_arch_call_opcode\n", type);
947                                 }
948                         } else {
949                                 /* the this argument */
950                                 stack_size += 4;
951                         }
952                 }
953         }
954
955         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
956                 if (cinfo->ret.storage == ArgValuetypeInReg) {
957                         MonoInst *zero_inst;
958                         /*
959                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
960                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
961                          * before calling the function. So we add a dummy instruction to represent pushing the 
962                          * struct return address to the stack. The return address will be saved to this stack slot 
963                          * by the code emitted in this_vret_args.
964                          */
965                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
966                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
967                         zero_inst->inst_p0 = 0;
968                         arg->inst_left = zero_inst;
969                         arg->type = STACK_PTR;
970                         /* prepend, so they get reversed */
971                         arg->next = call->out_args;
972                         call->out_args = arg;
973                 }
974                 else
975                         /* if the function returns a struct, the called method already does a ret $0x4 */
976                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
977                                 stack_size -= 4;
978         }
979
980         call->stack_usage = stack_size;
981         g_free (cinfo);
982
983         /* 
984          * should set more info in call, such as the stack space
985          * used by the args that needs to be added back to esp
986          */
987
988         return call;
989 }
990
991 /*
992  * Allow tracing to work with this interface (with an optional argument)
993  */
994 void*
995 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
996 {
997         guchar *code = p;
998
999         /* if some args are passed in registers, we need to save them here */
1000         x86_push_reg (code, X86_EBP);
1001
1002         if (cfg->compile_aot) {
1003                 x86_push_imm (code, cfg->method);
1004                 x86_mov_reg_imm (code, X86_EAX, func);
1005                 x86_call_reg (code, X86_EAX);
1006         } else {
1007                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1008                 x86_push_imm (code, cfg->method);
1009                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1010                 x86_call_code (code, 0);
1011         }
1012         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1013
1014         return code;
1015 }
1016
1017 enum {
1018         SAVE_NONE,
1019         SAVE_STRUCT,
1020         SAVE_EAX,
1021         SAVE_EAX_EDX,
1022         SAVE_FP
1023 };
1024
1025 void*
1026 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1027 {
1028         guchar *code = p;
1029         int arg_size = 0, save_mode = SAVE_NONE;
1030         MonoMethod *method = cfg->method;
1031         
1032         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1033         case MONO_TYPE_VOID:
1034                 /* special case string .ctor icall */
1035                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1036                         save_mode = SAVE_EAX;
1037                 else
1038                         save_mode = SAVE_NONE;
1039                 break;
1040         case MONO_TYPE_I8:
1041         case MONO_TYPE_U8:
1042                 save_mode = SAVE_EAX_EDX;
1043                 break;
1044         case MONO_TYPE_R4:
1045         case MONO_TYPE_R8:
1046                 save_mode = SAVE_FP;
1047                 break;
1048         case MONO_TYPE_VALUETYPE:
1049                 save_mode = SAVE_STRUCT;
1050                 break;
1051         default:
1052                 save_mode = SAVE_EAX;
1053                 break;
1054         }
1055
1056         switch (save_mode) {
1057         case SAVE_EAX_EDX:
1058                 x86_push_reg (code, X86_EDX);
1059                 x86_push_reg (code, X86_EAX);
1060                 if (enable_arguments) {
1061                         x86_push_reg (code, X86_EDX);
1062                         x86_push_reg (code, X86_EAX);
1063                         arg_size = 8;
1064                 }
1065                 break;
1066         case SAVE_EAX:
1067                 x86_push_reg (code, X86_EAX);
1068                 if (enable_arguments) {
1069                         x86_push_reg (code, X86_EAX);
1070                         arg_size = 4;
1071                 }
1072                 break;
1073         case SAVE_FP:
1074                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1075                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1076                 if (enable_arguments) {
1077                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1078                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1079                         arg_size = 8;
1080                 }
1081                 break;
1082         case SAVE_STRUCT:
1083                 if (enable_arguments) {
1084                         x86_push_membase (code, X86_EBP, 8);
1085                         arg_size = 4;
1086                 }
1087                 break;
1088         case SAVE_NONE:
1089         default:
1090                 break;
1091         }
1092
1093         if (cfg->compile_aot) {
1094                 x86_push_imm (code, method);
1095                 x86_mov_reg_imm (code, X86_EAX, func);
1096                 x86_call_reg (code, X86_EAX);
1097         } else {
1098                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1099                 x86_push_imm (code, method);
1100                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1101                 x86_call_code (code, 0);
1102         }
1103         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1104
1105         switch (save_mode) {
1106         case SAVE_EAX_EDX:
1107                 x86_pop_reg (code, X86_EAX);
1108                 x86_pop_reg (code, X86_EDX);
1109                 break;
1110         case SAVE_EAX:
1111                 x86_pop_reg (code, X86_EAX);
1112                 break;
1113         case SAVE_FP:
1114                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1115                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1116                 break;
1117         case SAVE_NONE:
1118         default:
1119                 break;
1120         }
1121
1122         return code;
1123 }
1124
1125 #define EMIT_COND_BRANCH(ins,cond,sign) \
1126 if (ins->flags & MONO_INST_BRLABEL) { \
1127         if (ins->inst_i0->inst_c0) { \
1128                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1129         } else { \
1130                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1131                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1132                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1133                         x86_branch8 (code, cond, 0, sign); \
1134                 else \
1135                         x86_branch32 (code, cond, 0, sign); \
1136         } \
1137 } else { \
1138         if (ins->inst_true_bb->native_offset) { \
1139                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1140         } else { \
1141                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1142                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1143                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1144                         x86_branch8 (code, cond, 0, sign); \
1145                 else \
1146                         x86_branch32 (code, cond, 0, sign); \
1147         } \
1148 }
1149
1150 /* emit an exception if condition is fail */
1151 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1152         do {                                                        \
1153                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1154                                     MONO_PATCH_INFO_EXC, exc_name);  \
1155                 x86_branch32 (code, cond, 0, signed);               \
1156         } while (0); 
1157
1158 #define EMIT_FPCOMPARE(code) do { \
1159         x86_fcompp (code); \
1160         x86_fnstsw (code); \
1161 } while (0); 
1162
1163
1164 static guint8*
1165 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1166 {
1167         if (cfg->compile_aot) {
1168                 guint32 got_reg = X86_EAX;
1169
1170                 if (cfg->compile_aot) {          
1171                         /*
1172                          * Since the patches are generated by the back end, there is
1173                          * no way to generate a got_var at this point.
1174                          */
1175                         g_assert (cfg->got_var);
1176
1177                         if (cfg->got_var->opcode == OP_REGOFFSET)
1178                                 x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
1179                         else
1180                                 got_reg = cfg->got_var->dreg;
1181                 }
1182
1183                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1184                 x86_call_membase (code, got_reg, 0xf0f0f0f0);
1185         }
1186         else {
1187                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1188                 x86_call_code (code, 0);
1189         }
1190
1191         return code;
1192 }
1193
1194 /* FIXME: Add more instructions */
1195 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1196
1197 static void
1198 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1199 {
1200         MonoInst *ins, *last_ins = NULL;
1201         ins = bb->code;
1202
1203         while (ins) {
1204
1205                 switch (ins->opcode) {
1206                 case OP_ICONST:
1207                         /* reg = 0 -> XOR (reg, reg) */
1208                         /* XOR sets cflags on x86, so we cant do it always */
1209                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1210                                 ins->opcode = CEE_XOR;
1211                                 ins->sreg1 = ins->dreg;
1212                                 ins->sreg2 = ins->dreg;
1213                         }
1214                         break;
1215                 case OP_MUL_IMM: 
1216                         /* remove unnecessary multiplication with 1 */
1217                         if (ins->inst_imm == 1) {
1218                                 if (ins->dreg != ins->sreg1) {
1219                                         ins->opcode = OP_MOVE;
1220                                 } else {
1221                                         last_ins->next = ins->next;
1222                                         ins = ins->next;
1223                                         continue;
1224                                 }
1225                         }
1226                         break;
1227                 case OP_COMPARE_IMM:
1228                         /* OP_COMPARE_IMM (reg, 0) 
1229                          * --> 
1230                          * OP_X86_TEST_NULL (reg) 
1231                          */
1232                         if (!ins->inst_imm)
1233                                 ins->opcode = OP_X86_TEST_NULL;
1234                         break;
1235                 case OP_X86_COMPARE_MEMBASE_IMM:
1236                         /* 
1237                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1238                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1239                          * -->
1240                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1241                          * OP_COMPARE_IMM reg, imm
1242                          *
1243                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1244                          */
1245                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1246                             ins->inst_basereg == last_ins->inst_destbasereg &&
1247                             ins->inst_offset == last_ins->inst_offset) {
1248                                         ins->opcode = OP_COMPARE_IMM;
1249                                         ins->sreg1 = last_ins->sreg1;
1250
1251                                         /* check if we can remove cmp reg,0 with test null */
1252                                         if (!ins->inst_imm)
1253                                                 ins->opcode = OP_X86_TEST_NULL;
1254                                 }
1255
1256                         break;
1257                 case OP_LOAD_MEMBASE:
1258                 case OP_LOADI4_MEMBASE:
1259                         /* 
1260                          * Note: if reg1 = reg2 the load op is removed
1261                          *
1262                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1263                          * OP_LOAD_MEMBASE offset(basereg), reg2
1264                          * -->
1265                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1266                          * OP_MOVE reg1, reg2
1267                          */
1268                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1269                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1270                             ins->inst_basereg == last_ins->inst_destbasereg &&
1271                             ins->inst_offset == last_ins->inst_offset) {
1272                                 if (ins->dreg == last_ins->sreg1) {
1273                                         last_ins->next = ins->next;                             
1274                                         ins = ins->next;                                
1275                                         continue;
1276                                 } else {
1277                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1278                                         ins->opcode = OP_MOVE;
1279                                         ins->sreg1 = last_ins->sreg1;
1280                                 }
1281
1282                         /* 
1283                          * Note: reg1 must be different from the basereg in the second load
1284                          * Note: if reg1 = reg2 is equal then second load is removed
1285                          *
1286                          * OP_LOAD_MEMBASE offset(basereg), reg1
1287                          * OP_LOAD_MEMBASE offset(basereg), reg2
1288                          * -->
1289                          * OP_LOAD_MEMBASE offset(basereg), reg1
1290                          * OP_MOVE reg1, reg2
1291                          */
1292                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1293                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1294                               ins->inst_basereg != last_ins->dreg &&
1295                               ins->inst_basereg == last_ins->inst_basereg &&
1296                               ins->inst_offset == last_ins->inst_offset) {
1297
1298                                 if (ins->dreg == last_ins->dreg) {
1299                                         last_ins->next = ins->next;                             
1300                                         ins = ins->next;                                
1301                                         continue;
1302                                 } else {
1303                                         ins->opcode = OP_MOVE;
1304                                         ins->sreg1 = last_ins->dreg;
1305                                 }
1306
1307                                 //g_assert_not_reached ();
1308
1309 #if 0
1310                         /* 
1311                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1312                          * OP_LOAD_MEMBASE offset(basereg), reg
1313                          * -->
1314                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1315                          * OP_ICONST reg, imm
1316                          */
1317                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1318                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1319                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1320                                    ins->inst_offset == last_ins->inst_offset) {
1321                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1322                                 ins->opcode = OP_ICONST;
1323                                 ins->inst_c0 = last_ins->inst_imm;
1324                                 g_assert_not_reached (); // check this rule
1325 #endif
1326                         }
1327                         break;
1328                 case OP_LOADU1_MEMBASE:
1329                 case OP_LOADI1_MEMBASE:
1330                         /* 
1331                          * Note: if reg1 = reg2 the load op is removed
1332                          *
1333                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1334                          * OP_LOAD_MEMBASE offset(basereg), reg2
1335                          * -->
1336                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1337                          * OP_MOVE reg1, reg2
1338                          */
1339                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1340                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1341                                         ins->inst_offset == last_ins->inst_offset) {
1342                                 if (ins->dreg == last_ins->sreg1) {
1343                                         last_ins->next = ins->next;                             
1344                                         ins = ins->next;                                
1345                                         continue;
1346                                 } else {
1347                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1348                                         ins->opcode = OP_MOVE;
1349                                         ins->sreg1 = last_ins->sreg1;
1350                                 }
1351                         }
1352                         break;
1353                 case OP_LOADU2_MEMBASE:
1354                 case OP_LOADI2_MEMBASE:
1355                         /* 
1356                          * Note: if reg1 = reg2 the load op is removed
1357                          *
1358                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1359                          * OP_LOAD_MEMBASE offset(basereg), reg2
1360                          * -->
1361                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1362                          * OP_MOVE reg1, reg2
1363                          */
1364                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1365                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1366                                         ins->inst_offset == last_ins->inst_offset) {
1367                                 if (ins->dreg == last_ins->sreg1) {
1368                                         last_ins->next = ins->next;                             
1369                                         ins = ins->next;                                
1370                                         continue;
1371                                 } else {
1372                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1373                                         ins->opcode = OP_MOVE;
1374                                         ins->sreg1 = last_ins->sreg1;
1375                                 }
1376                         }
1377                         break;
1378                 case CEE_CONV_I4:
1379                 case CEE_CONV_U4:
1380                 case OP_MOVE:
1381                         /*
1382                          * Removes:
1383                          *
1384                          * OP_MOVE reg, reg 
1385                          */
1386                         if (ins->dreg == ins->sreg1) {
1387                                 if (last_ins)
1388                                         last_ins->next = ins->next;                             
1389                                 ins = ins->next;
1390                                 continue;
1391                         }
1392                         /* 
1393                          * Removes:
1394                          *
1395                          * OP_MOVE sreg, dreg 
1396                          * OP_MOVE dreg, sreg
1397                          */
1398                         if (last_ins && last_ins->opcode == OP_MOVE &&
1399                             ins->sreg1 == last_ins->dreg &&
1400                             ins->dreg == last_ins->sreg1) {
1401                                 last_ins->next = ins->next;                             
1402                                 ins = ins->next;                                
1403                                 continue;
1404                         }
1405                         break;
1406                         
1407                 case OP_X86_PUSH_MEMBASE:
1408                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1409                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1410                             ins->inst_basereg == last_ins->inst_destbasereg &&
1411                             ins->inst_offset == last_ins->inst_offset) {
1412                                     ins->opcode = OP_X86_PUSH;
1413                                     ins->sreg1 = last_ins->sreg1;
1414                         }
1415                         break;
1416                 }
1417                 last_ins = ins;
1418                 ins = ins->next;
1419         }
1420         bb->last_ins = last_ins;
1421 }
1422
1423 static const int 
1424 branch_cc_table [] = {
1425         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1426         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1427         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1428 };
1429
1430 #define DEBUG(a) if (cfg->verbose_level > 1) a
1431 //#define DEBUG(a)
1432
1433 /*
1434  * returns the offset used by spillvar. It allocates a new
1435  * spill variable if necessary. 
1436  */
1437 static int
1438 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1439 {
1440         MonoSpillInfo **si, *info;
1441         int i = 0;
1442
1443         si = &cfg->spill_info; 
1444         
1445         while (i <= spillvar) {
1446
1447                 if (!*si) {
1448                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1449                         info->next = NULL;
1450                         cfg->stack_offset -= sizeof (gpointer);
1451                         info->offset = cfg->stack_offset;
1452                 }
1453
1454                 if (i == spillvar)
1455                         return (*si)->offset;
1456
1457                 i++;
1458                 si = &(*si)->next;
1459         }
1460
1461         g_assert_not_reached ();
1462         return 0;
1463 }
1464
1465 /*
1466  * returns the offset used by spillvar. It allocates a new
1467  * spill float variable if necessary. 
1468  * (same as mono_spillvar_offset but for float)
1469  */
1470 static int
1471 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1472 {
1473         MonoSpillInfo **si, *info;
1474         int i = 0;
1475
1476         si = &cfg->spill_info_float; 
1477         
1478         while (i <= spillvar) {
1479
1480                 if (!*si) {
1481                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1482                         info->next = NULL;
1483                         cfg->stack_offset -= sizeof (double);
1484                         info->offset = cfg->stack_offset;
1485                 }
1486
1487                 if (i == spillvar)
1488                         return (*si)->offset;
1489
1490                 i++;
1491                 si = &(*si)->next;
1492         }
1493
1494         g_assert_not_reached ();
1495         return 0;
1496 }
1497
1498 /*
1499  * Creates a store for spilled floating point items
1500  */
1501 static MonoInst*
1502 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1503 {
1504         MonoInst *store;
1505         MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1506         store->sreg1 = reg;
1507         store->inst_destbasereg = X86_EBP;
1508         store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1509
1510         DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08x(%%sp)) (from %d)\n", spill, store->inst_offset, reg));
1511         return store;
1512 }
1513
1514 /*
1515  * Creates a load for spilled floating point items 
1516  */
1517 static MonoInst*
1518 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1519 {
1520         MonoInst *load;
1521         MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1522         load->dreg = reg;
1523         load->inst_basereg = X86_EBP;
1524         load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1525
1526         DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08x(%%sp)) (from %d)\n", spill, load->inst_offset, reg));
1527         return load;
1528 }
1529
1530 #define is_global_ireg(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && !X86_IS_CALLEE ((r)))
1531 #define reg_is_freeable(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && X86_IS_CALLEE ((r)))
1532
1533 typedef struct {
1534         int born_in;
1535         int killed_in;
1536         int last_use;
1537         int prev_use;
1538         int flags;              /* used to track fp spill/load */
1539 } RegTrack;
1540
1541 static const char*const * ins_spec = pentium_desc;
1542
1543 static void
1544 print_ins (int i, MonoInst *ins)
1545 {
1546         const char *spec = ins_spec [ins->opcode];
1547         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1548         if (spec [MONO_INST_DEST]) {
1549                 if (ins->dreg >= MONO_MAX_IREGS)
1550                         g_print (" R%d <-", ins->dreg);
1551                 else
1552                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1553         }
1554         if (spec [MONO_INST_SRC1]) {
1555                 if (ins->sreg1 >= MONO_MAX_IREGS)
1556                         g_print (" R%d", ins->sreg1);
1557                 else
1558                         g_print (" %s", mono_arch_regname (ins->sreg1));
1559         }
1560         if (spec [MONO_INST_SRC2]) {
1561                 if (ins->sreg2 >= MONO_MAX_IREGS)
1562                         g_print (" R%d", ins->sreg2);
1563                 else
1564                         g_print (" %s", mono_arch_regname (ins->sreg2));
1565         }
1566         if (spec [MONO_INST_CLOB])
1567                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1568         g_print ("\n");
1569 }
1570
1571 static void
1572 print_regtrack (RegTrack *t, int num)
1573 {
1574         int i;
1575         char buf [32];
1576         const char *r;
1577         
1578         for (i = 0; i < num; ++i) {
1579                 if (!t [i].born_in)
1580                         continue;
1581                 if (i >= MONO_MAX_IREGS) {
1582                         g_snprintf (buf, sizeof(buf), "R%d", i);
1583                         r = buf;
1584                 } else
1585                         r = mono_arch_regname (i);
1586                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1587         }
1588 }
1589
1590 typedef struct InstList InstList;
1591
1592 struct InstList {
1593         InstList *prev;
1594         InstList *next;
1595         MonoInst *data;
1596 };
1597
1598 static inline InstList*
1599 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1600 {
1601         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1602         item->data = data;
1603         item->prev = NULL;
1604         item->next = list;
1605         if (list)
1606                 list->prev = item;
1607         return item;
1608 }
1609
1610 /*
1611  * Force the spilling of the variable in the symbolic register 'reg'.
1612  */
1613 static int
1614 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1615 {
1616         MonoInst *load;
1617         int i, sel, spill;
1618         
1619         sel = cfg->rs->iassign [reg];
1620         /*i = cfg->rs->isymbolic [sel];
1621         g_assert (i == reg);*/
1622         i = reg;
1623         spill = ++cfg->spill_count;
1624         cfg->rs->iassign [i] = -spill - 1;
1625         mono_regstate_free_int (cfg->rs, sel);
1626         /* we need to create a spill var and insert a load to sel after the current instruction */
1627         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1628         load->dreg = sel;
1629         load->inst_basereg = X86_EBP;
1630         load->inst_offset = mono_spillvar_offset (cfg, spill);
1631         if (item->prev) {
1632                 while (ins->next != item->prev->data)
1633                         ins = ins->next;
1634         }
1635         load->next = ins->next;
1636         ins->next = load;
1637         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1638         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1639         g_assert (i == sel);
1640
1641         return sel;
1642 }
1643
1644 static int
1645 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1646 {
1647         MonoInst *load;
1648         int i, sel, spill;
1649
1650         DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1651         /* exclude the registers in the current instruction */
1652         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1653                 if (ins->sreg1 >= MONO_MAX_IREGS)
1654                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1655                 else
1656                         regmask &= ~ (1 << ins->sreg1);
1657                 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1658         }
1659         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1660                 if (ins->sreg2 >= MONO_MAX_IREGS)
1661                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1662                 else
1663                         regmask &= ~ (1 << ins->sreg2);
1664                 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1665         }
1666         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1667                 regmask &= ~ (1 << ins->dreg);
1668                 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1669         }
1670
1671         DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1672         g_assert (regmask); /* need at least a register we can free */
1673         sel = -1;
1674         /* we should track prev_use and spill the register that's farther */
1675         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1676                 if (regmask & (1 << i)) {
1677                         sel = i;
1678                         DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1679                         break;
1680                 }
1681         }
1682         i = cfg->rs->isymbolic [sel];
1683         spill = ++cfg->spill_count;
1684         cfg->rs->iassign [i] = -spill - 1;
1685         mono_regstate_free_int (cfg->rs, sel);
1686         /* we need to create a spill var and insert a load to sel after the current instruction */
1687         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1688         load->dreg = sel;
1689         load->inst_basereg = X86_EBP;
1690         load->inst_offset = mono_spillvar_offset (cfg, spill);
1691         if (item->prev) {
1692                 while (ins->next != item->prev->data)
1693                         ins = ins->next;
1694         }
1695         load->next = ins->next;
1696         ins->next = load;
1697         DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1698         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1699         g_assert (i == sel);
1700         
1701         return sel;
1702 }
1703
1704 static MonoInst*
1705 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1706 {
1707         MonoInst *copy;
1708         MONO_INST_NEW (cfg, copy, OP_MOVE);
1709         copy->dreg = dest;
1710         copy->sreg1 = src;
1711         if (ins) {
1712                 copy->next = ins->next;
1713                 ins->next = copy;
1714         }
1715         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1716         return copy;
1717 }
1718
1719 static MonoInst*
1720 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1721 {
1722         MonoInst *store;
1723         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1724         store->sreg1 = reg;
1725         store->inst_destbasereg = X86_EBP;
1726         store->inst_offset = mono_spillvar_offset (cfg, spill);
1727         if (ins) {
1728                 store->next = ins->next;
1729                 ins->next = store;
1730         }
1731         DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08x(%%ebp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
1732         return store;
1733 }
1734
1735 static void
1736 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1737 {
1738         MonoInst *prev;
1739         if (item->next) {
1740                 prev = item->next->data;
1741
1742                 while (prev->next != ins)
1743                         prev = prev->next;
1744                 to_insert->next = ins;
1745                 prev->next = to_insert;
1746         } else {
1747                 to_insert->next = ins;
1748         }
1749         /* 
1750          * needed otherwise in the next instruction we can add an ins to the 
1751          * end and that would get past this instruction.
1752          */
1753         item->data = to_insert; 
1754 }
1755
1756
1757 #if  0
1758 static int
1759 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1760 {
1761         int val = cfg->rs->iassign [sym_reg];
1762         if (val < 0) {
1763                 int spill = 0;
1764                 if (val < -1) {
1765                         /* the register gets spilled after this inst */
1766                         spill = -val -1;
1767                 }
1768                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1769                 if (val < 0)
1770                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1771                 cfg->rs->iassign [sym_reg] = val;
1772                 /* add option to store before the instruction for src registers */
1773                 if (spill)
1774                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1775         }
1776         cfg->rs->isymbolic [val] = sym_reg;
1777         return val;
1778 }
1779 #endif
1780
1781 /* flags used in reginfo->flags */
1782 enum {
1783         MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
1784         MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
1785         MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
1786         MONO_X86_REG_NOT_ECX                    = 1 << 3,
1787         MONO_X86_REG_EAX                                = 1 << 4,
1788         MONO_X86_REG_EDX                                = 1 << 5,
1789         MONO_X86_REG_ECX                                = 1 << 6
1790 };
1791
1792 static int
1793 mono_x86_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1794 {
1795         int val;
1796         int test_mask = dest_mask;
1797
1798         if (flags & MONO_X86_REG_EAX)
1799                 test_mask &= (1 << X86_EAX);
1800         else if (flags & MONO_X86_REG_EDX)
1801                 test_mask &= (1 << X86_EDX);
1802         else if (flags & MONO_X86_REG_ECX)
1803                 test_mask &= (1 << X86_ECX);
1804         else if (flags & MONO_X86_REG_NOT_ECX)
1805                 test_mask &= ~ (1 << X86_ECX);
1806
1807         val = mono_regstate_alloc_int (cfg->rs, test_mask);
1808         if (val >= 0 && test_mask != dest_mask)
1809                 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1810
1811         if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1812                 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1813                 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << X86_ECX)));
1814         }
1815
1816         if (val < 0) {
1817                 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1818                 if (val < 0)
1819                         val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1820         }
1821
1822         return val;
1823 }
1824
1825 static inline void
1826 assign_ireg (MonoRegState *rs, int reg, int hreg)
1827 {
1828         g_assert (reg >= MONO_MAX_IREGS);
1829         g_assert (hreg < MONO_MAX_IREGS);
1830         g_assert (! is_global_ireg (hreg));
1831
1832         rs->iassign [reg] = hreg;
1833         rs->isymbolic [hreg] = reg;
1834         rs->ifree_mask &= ~ (1 << hreg);
1835 }
1836
1837 /*#include "cprop.c"*/
1838
1839 /*
1840  * Local register allocation.
1841  * We first scan the list of instructions and we save the liveness info of
1842  * each register (when the register is first used, when it's value is set etc.).
1843  * We also reverse the list of instructions (in the InstList list) because assigning
1844  * registers backwards allows for more tricks to be used.
1845  */
1846 void
1847 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1848 {
1849         MonoInst *ins;
1850         MonoRegState *rs = cfg->rs;
1851         int i, val, fpcount;
1852         RegTrack *reginfo, *reginfof;
1853         RegTrack *reginfo1, *reginfo2, *reginfod;
1854         InstList *tmp, *reversed = NULL;
1855         const char *spec;
1856         guint32 src1_mask, src2_mask, dest_mask;
1857         GList *fspill_list = NULL;
1858         int fspill = 0;
1859
1860         if (!bb->code)
1861                 return;
1862         rs->next_vireg = bb->max_ireg;
1863         rs->next_vfreg = bb->max_freg;
1864         mono_regstate_assign (rs);
1865         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1866         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1867         rs->ifree_mask = X86_CALLEE_REGS;
1868
1869         ins = bb->code;
1870
1871         /*if (cfg->opt & MONO_OPT_COPYPROP)
1872                 local_copy_prop (cfg, ins);*/
1873
1874         i = 1;
1875         fpcount = 0;
1876         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1877         /* forward pass on the instructions to collect register liveness info */
1878         while (ins) {
1879                 spec = ins_spec [ins->opcode];
1880                 
1881                 DEBUG (print_ins (i, ins));
1882
1883                 if (spec [MONO_INST_SRC1]) {
1884                         if (spec [MONO_INST_SRC1] == 'f') {
1885                                 GList *spill;
1886                                 reginfo1 = reginfof;
1887
1888                                 spill = g_list_first (fspill_list);
1889                                 if (spill && fpcount < MONO_MAX_FREGS) {
1890                                         reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1891                                         fspill_list = g_list_remove (fspill_list, spill->data);
1892                                 } else
1893                                         fpcount--;
1894                         }
1895                         else
1896                                 reginfo1 = reginfo;
1897                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1898                         reginfo1 [ins->sreg1].last_use = i;
1899                         if (spec [MONO_INST_SRC1] == 'L') {
1900                                 /* The virtual register is allocated sequentially */
1901                                 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1902                                 reginfo1 [ins->sreg1 + 1].last_use = i;
1903                                 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1904                                         reginfo1 [ins->sreg1 + 1].born_in = i;
1905
1906                                 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1907                                 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1908                         }
1909                 } else {
1910                         ins->sreg1 = -1;
1911                 }
1912                 if (spec [MONO_INST_SRC2]) {
1913                         if (spec [MONO_INST_SRC2] == 'f') {
1914                                 GList *spill;
1915                                 reginfo2 = reginfof;
1916                                 spill = g_list_first (fspill_list);
1917                                 if (spill) {
1918                                         reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1919                                         fspill_list = g_list_remove (fspill_list, spill->data);
1920                                         if (fpcount >= MONO_MAX_FREGS) {
1921                                                 fspill++;
1922                                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1923                                                 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1924                                         }
1925                                 } else
1926                                         fpcount--;
1927                         }
1928                         else
1929                                 reginfo2 = reginfo;
1930                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1931                         reginfo2 [ins->sreg2].last_use = i;
1932                         if (spec [MONO_INST_SRC2] == 'L') {
1933                                 /* The virtual register is allocated sequentially */
1934                                 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1935                                 reginfo2 [ins->sreg2 + 1].last_use = i;
1936                                 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1937                                         reginfo2 [ins->sreg2 + 1].born_in = i;
1938                         }
1939                         if (spec [MONO_INST_CLOB] == 's') {
1940                                 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1941                                 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1942                         }
1943                 } else {
1944                         ins->sreg2 = -1;
1945                 }
1946                 if (spec [MONO_INST_DEST]) {
1947                         if (spec [MONO_INST_DEST] == 'f') {
1948                                 reginfod = reginfof;
1949                                 if (fpcount >= MONO_MAX_FREGS) {
1950                                         reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1951                                         fspill++;
1952                                         fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1953                                         fpcount--;
1954                                 }
1955                                 fpcount++;
1956                         }
1957                         else
1958                                 reginfod = reginfo;
1959                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1960                                 reginfod [ins->dreg].killed_in = i;
1961                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1962                         reginfod [ins->dreg].last_use = i;
1963                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1964                                 reginfod [ins->dreg].born_in = i;
1965                         if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
1966                                 /* The virtual register is allocated sequentially */
1967                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1968                                 reginfod [ins->dreg + 1].last_use = i;
1969                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1970                                         reginfod [ins->dreg + 1].born_in = i;
1971
1972                                 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
1973                                 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
1974                         }
1975                 } else {
1976                         ins->dreg = -1;
1977                 }
1978
1979                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
1980                 ++i;
1981                 ins = ins->next;
1982         }
1983
1984         // todo: check if we have anything left on fp stack, in verify mode?
1985         fspill = 0;
1986
1987         DEBUG (print_regtrack (reginfo, rs->next_vireg));
1988         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
1989         tmp = reversed;
1990         while (tmp) {
1991                 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
1992                 dest_mask = src1_mask = src2_mask = X86_CALLEE_REGS;
1993                 --i;
1994                 ins = tmp->data;
1995                 spec = ins_spec [ins->opcode];
1996                 prev_dreg = -1;
1997                 clob_dreg = -1;
1998                 DEBUG (g_print ("processing:"));
1999                 DEBUG (print_ins (i, ins));
2000                 if (spec [MONO_INST_CLOB] == 's') {
2001                         /*
2002                          * Shift opcodes, SREG2 must be RCX
2003                          */
2004                         if (rs->ifree_mask & (1 << X86_ECX)) {
2005                                 if (ins->sreg2 < MONO_MAX_IREGS) {
2006                                         /* Argument already in hard reg, need to copy */
2007                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
2008                                         insert_before_ins (ins, tmp, copy);
2009                                 }
2010                                 else {
2011                                         DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
2012                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2013                                 }
2014                         } else {
2015                                 int need_ecx_spill = TRUE;
2016                                 /* 
2017                                  * we first check if src1/dreg is already assigned a register
2018                                  * and then we force a spill of the var assigned to ECX.
2019                                  */
2020                                 /* the destination register can't be ECX */
2021                                 dest_mask &= ~ (1 << X86_ECX);
2022                                 src1_mask &= ~ (1 << X86_ECX);
2023                                 val = rs->iassign [ins->dreg];
2024                                 /* 
2025                                  * the destination register is already assigned to ECX:
2026                                  * we need to allocate another register for it and then
2027                                  * copy from this to ECX.
2028                                  */
2029                                 if (val == X86_ECX && ins->dreg != ins->sreg2) {
2030                                         int new_dest;
2031                                         new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2032                                         g_assert (new_dest >= 0);
2033                                         DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
2034
2035                                         rs->isymbolic [new_dest] = ins->dreg;
2036                                         rs->iassign [ins->dreg] = new_dest;
2037                                         clob_dreg = ins->dreg;
2038                                         ins->dreg = new_dest;
2039                                         create_copy_ins (cfg, X86_ECX, new_dest, ins);
2040                                         need_ecx_spill = FALSE;
2041                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
2042                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
2043                                         rs->iassign [ins->dreg] = val;
2044                                         rs->isymbolic [val] = prev_dreg;
2045                                         ins->dreg = val;*/
2046                                 }
2047                                 if (is_global_ireg (ins->sreg2)) {
2048                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
2049                                         insert_before_ins (ins, tmp, copy);
2050                                 }
2051                                 else {
2052                                         val = rs->iassign [ins->sreg2];
2053                                         if (val >= 0 && val != X86_ECX) {
2054                                                 MonoInst *move = create_copy_ins (cfg, X86_ECX, val, NULL);
2055                                                 DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
2056                                                 move->next = ins;
2057                                                 g_assert_not_reached ();
2058                                                 /* FIXME: where is move connected to the instruction list? */
2059                                                 //tmp->prev->data->next = move;
2060                                         }
2061                                         else {
2062                                                 if (val == X86_ECX)
2063                                                 need_ecx_spill = FALSE;
2064                                         }
2065                                 }
2066                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << X86_ECX))) {
2067                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_ECX]));
2068                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_ECX]);
2069                                         mono_regstate_free_int (rs, X86_ECX);
2070                                 }
2071                                 if (!is_global_ireg (ins->sreg2))
2072                                         /* force-set sreg2 */
2073                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2074                         }
2075                         ins->sreg2 = X86_ECX;
2076                 } else if (spec [MONO_INST_CLOB] == 'd') {
2077                         /*
2078                          * DIVISION/REMAINER
2079                          */
2080                         int dest_reg = X86_EAX;
2081                         int clob_reg = X86_EDX;
2082                         if (spec [MONO_INST_DEST] == 'd') {
2083                                 dest_reg = X86_EDX; /* reminder */
2084                                 clob_reg = X86_EAX;
2085                         }
2086                         if (is_global_ireg (ins->dreg))
2087                                 val = ins->dreg;
2088                         else
2089                                 val = rs->iassign [ins->dreg];
2090                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
2091                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2092                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2093                                 mono_regstate_free_int (rs, dest_reg);
2094                         }
2095                         if (val < 0) {
2096                                 if (val < -1) {
2097                                         /* the register gets spilled after this inst */
2098                                         int spill = -val -1;
2099                                         dest_mask = 1 << dest_reg;
2100                                         prev_dreg = ins->dreg;
2101                                         val = mono_regstate_alloc_int (rs, dest_mask);
2102                                         if (val < 0)
2103                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
2104                                         rs->iassign [ins->dreg] = val;
2105                                         if (spill)
2106                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
2107                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2108                                         rs->isymbolic [val] = prev_dreg;
2109                                         ins->dreg = val;
2110                                 } else {
2111                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
2112                                         prev_dreg = ins->dreg;
2113                                         assign_ireg (rs, ins->dreg, dest_reg);
2114                                         ins->dreg = dest_reg;
2115                                         val = dest_reg;
2116                                 }
2117                         }
2118
2119                         //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
2120                         if (val != dest_reg) { /* force a copy */
2121                                 create_copy_ins (cfg, val, dest_reg, ins);
2122                                 if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
2123                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2124                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2125                                         mono_regstate_free_int (rs, dest_reg);
2126                                 }
2127                         }
2128                         if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
2129                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2130                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2131                                 mono_regstate_free_int (rs, clob_reg);
2132                         }
2133                         src1_mask = 1 << X86_EAX;
2134                         src2_mask = 1 << X86_ECX;
2135                 } else if (spec [MONO_INST_DEST] == 'l') {
2136                         int hreg;
2137                         val = rs->iassign [ins->dreg];
2138                         /* check special case when dreg have been moved from ecx (clob shift) */
2139                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2140                                 hreg = clob_dreg + 1;
2141                         else
2142                                 hreg = ins->dreg + 1;
2143
2144                         /* base prev_dreg on fixed hreg, handle clob case */
2145                         val = hreg - 1;
2146
2147                         if (val != rs->isymbolic [X86_EAX] && !(rs->ifree_mask & (1 << X86_EAX))) {
2148                                 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [X86_EAX]));
2149                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2150                                 mono_regstate_free_int (rs, X86_EAX);
2151                         }
2152                         if (hreg != rs->isymbolic [X86_EDX] && !(rs->ifree_mask & (1 << X86_EDX))) {
2153                                 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [X86_EDX]));
2154                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
2155                                 mono_regstate_free_int (rs, X86_EDX);
2156                         }
2157                 } else if (spec [MONO_INST_CLOB] == 'b') {
2158                         /*
2159                          * x86_set_reg instructions, dreg needs to be EAX..EDX
2160                          */     
2161                         dest_mask = (1 << X86_EAX) | (1 << X86_EBX) | (1 << X86_ECX) | (1 << X86_EDX);
2162                         if ((ins->dreg < MONO_MAX_IREGS) && (! (dest_mask & (1 << ins->dreg)))) {
2163                                 /* 
2164                                  * ins->dreg is already a hard reg, need to allocate another
2165                                  * suitable hard reg and make a copy.
2166                                  */
2167                                 int new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2168                                 g_assert (new_dest >= 0);
2169
2170                                 create_copy_ins (cfg, ins->dreg, new_dest, ins);
2171                                 DEBUG (g_print ("\tclob:b changing dreg R%d to %s\n", ins->dreg, mono_arch_regname (new_dest)));
2172                                 ins->dreg = new_dest;
2173
2174                                 /* The hard reg is no longer needed */
2175                                 mono_regstate_free_int (rs, new_dest);
2176                         }
2177                 }
2178
2179                 /*
2180                  * TRACK DREG
2181                  */
2182                 if (spec [MONO_INST_DEST] == 'f') {
2183                         if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
2184                                 GList *spill_node;
2185                                 MonoInst *store;
2186                                 spill_node = g_list_first (fspill_list);
2187                                 g_assert (spill_node);
2188
2189                                 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
2190                                 insert_before_ins (ins, tmp, store);
2191                                 fspill_list = g_list_remove (fspill_list, spill_node->data);
2192                                 fspill--;
2193                         }
2194                 } else if (spec [MONO_INST_DEST] == 'L') {
2195                         int hreg;
2196                         val = rs->iassign [ins->dreg];
2197                         /* check special case when dreg have been moved from ecx (clob shift) */
2198                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2199                                 hreg = clob_dreg + 1;
2200                         else
2201                                 hreg = ins->dreg + 1;
2202
2203                         /* base prev_dreg on fixed hreg, handle clob case */
2204                         prev_dreg = hreg - 1;
2205
2206                         if (val < 0) {
2207                                 int spill = 0;
2208                                 if (val < -1) {
2209                                         /* the register gets spilled after this inst */
2210                                         spill = -val -1;
2211                                 }
2212                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2213                                 rs->iassign [ins->dreg] = val;
2214                                 if (spill)
2215                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2216                         }
2217
2218                         DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
2219  
2220                         rs->isymbolic [val] = hreg - 1;
2221                         ins->dreg = val;
2222                         
2223                         val = rs->iassign [hreg];
2224                         if (val < 0) {
2225                                 int spill = 0;
2226                                 if (val < -1) {
2227                                         /* the register gets spilled after this inst */
2228                                         spill = -val -1;
2229                                 }
2230                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2231                                 rs->iassign [hreg] = val;
2232                                 if (spill)
2233                                         create_spilled_store (cfg, spill, val, hreg, ins);
2234                         }
2235
2236                         DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
2237                         rs->isymbolic [val] = hreg;
2238                         /* save reg allocating into unused */
2239                         ins->unused = val;
2240
2241                         /* check if we can free our long reg */
2242                         if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2243                                 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
2244                                 mono_regstate_free_int (rs, val);
2245                         }
2246                 }
2247                 else if (ins->dreg >= MONO_MAX_IREGS) {
2248                         int hreg;
2249                         val = rs->iassign [ins->dreg];
2250                         if (spec [MONO_INST_DEST] == 'l') {
2251                                 /* check special case when dreg have been moved from ecx (clob shift) */
2252                                 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2253                                         hreg = clob_dreg + 1;
2254                                 else
2255                                         hreg = ins->dreg + 1;
2256
2257                                 /* base prev_dreg on fixed hreg, handle clob case */
2258                                 prev_dreg = hreg - 1;
2259                         } else
2260                                 prev_dreg = ins->dreg;
2261
2262                         if (val < 0) {
2263                                 int spill = 0;
2264                                 if (val < -1) {
2265                                         /* the register gets spilled after this inst */
2266                                         spill = -val -1;
2267                                 }
2268                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2269                                 rs->iassign [ins->dreg] = val;
2270                                 if (spill)
2271                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2272                         }
2273                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2274                         rs->isymbolic [val] = prev_dreg;
2275                         ins->dreg = val;
2276                         /* handle cases where lreg needs to be eax:edx */
2277                         if (spec [MONO_INST_DEST] == 'l') {
2278                                 /* check special case when dreg have been moved from ecx (clob shift) */
2279                                 int hreg = prev_dreg + 1;
2280                                 val = rs->iassign [hreg];
2281                                 if (val < 0) {
2282                                         int spill = 0;
2283                                         if (val < -1) {
2284                                                 /* the register gets spilled after this inst */
2285                                                 spill = -val -1;
2286                                         }
2287                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2288                                         rs->iassign [hreg] = val;
2289                                         if (spill)
2290                                                 create_spilled_store (cfg, spill, val, hreg, ins);
2291                                 }
2292                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
2293                                 rs->isymbolic [val] = hreg;
2294                                 if (ins->dreg == X86_EAX) {
2295                                         if (val != X86_EDX)
2296                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2297                                 } else if (ins->dreg == X86_EDX) {
2298                                         if (val == X86_EAX) {
2299                                                 /* swap */
2300                                                 g_assert_not_reached ();
2301                                         } else {
2302                                                 /* two forced copies */
2303                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2304                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2305                                         }
2306                                 } else {
2307                                         if (val == X86_EDX) {
2308                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2309                                         } else {
2310                                                 /* two forced copies */
2311                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2312                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2313                                         }
2314                                 }
2315                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2316                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
2317                                         mono_regstate_free_int (rs, val);
2318                                 }
2319                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != X86_EAX && spec [MONO_INST_CLOB] != 'd') {
2320                                 /* this instruction only outputs to EAX, need to copy */
2321                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2322                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != X86_EDX && spec [MONO_INST_CLOB] != 'd') {
2323                                 create_copy_ins (cfg, ins->dreg, X86_EDX, ins);
2324                         }
2325                 }
2326                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
2327                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
2328                         mono_regstate_free_int (rs, ins->dreg);
2329                 }
2330                 /* put src1 in EAX if it needs to be */
2331                 if (spec [MONO_INST_SRC1] == 'a') {
2332                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
2333                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
2334                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2335                                 mono_regstate_free_int (rs, X86_EAX);
2336                         }
2337                         if (ins->sreg1 < MONO_MAX_IREGS) {
2338                                 /* The argument is already in a hard reg, need to copy */
2339                                 MonoInst *copy = create_copy_ins (cfg, X86_EAX, ins->sreg1, NULL);
2340                                 insert_before_ins (ins, tmp, copy);
2341                         }
2342                         else
2343                                 /* force-set sreg1 */
2344                                 assign_ireg (rs, ins->sreg1, X86_EAX);
2345                         ins->sreg1 = X86_EAX;
2346                 }
2347
2348                 /*
2349                  * TRACK SREG1
2350                  */
2351                 if (spec [MONO_INST_SRC1] == 'f') {
2352                         if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
2353                                 MonoInst *load;
2354                                 MonoInst *store = NULL;
2355
2356                                 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2357                                         GList *spill_node;
2358                                         spill_node = g_list_first (fspill_list);
2359                                         g_assert (spill_node);
2360
2361                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);          
2362                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2363                                 }
2364
2365                                 fspill++;
2366                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2367                                 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
2368                                 insert_before_ins (ins, tmp, load);
2369                                 if (store) 
2370                                         insert_before_ins (load, tmp, store);
2371                         }
2372                 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
2373                         /* force source to be same as dest */
2374                         assign_ireg (rs, ins->sreg1, ins->dreg);
2375                         assign_ireg (rs, ins->sreg1 + 1, ins->unused);
2376
2377                         DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
2378                         DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
2379
2380                         ins->sreg1 = ins->dreg;
2381                         /* 
2382                          * No need for saving the reg, we know that src1=dest in this cases
2383                          * ins->inst_c0 = ins->unused;
2384                          */
2385                 }
2386                 else if (ins->sreg1 >= MONO_MAX_IREGS) {
2387                         val = rs->iassign [ins->sreg1];
2388                         prev_sreg1 = ins->sreg1;
2389                         if (val < 0) {
2390                                 int spill = 0;
2391                                 if (val < -1) {
2392                                         /* the register gets spilled after this inst */
2393                                         spill = -val -1;
2394                                 }
2395                                 if (0 && ins->opcode == OP_MOVE) {
2396                                         /* 
2397                                          * small optimization: the dest register is already allocated
2398                                          * but the src one is not: we can simply assign the same register
2399                                          * here and peephole will get rid of the instruction later.
2400                                          * This optimization may interfere with the clobbering handling:
2401                                          * it removes a mov operation that will be added again to handle clobbering.
2402                                          * There are also some other issues that should with make testjit.
2403                                          */
2404                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
2405                                         val = rs->iassign [ins->sreg1] = ins->dreg;
2406                                         //g_assert (val >= 0);
2407                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2408                                 } else {
2409                                         //g_assert (val == -1); /* source cannot be spilled */
2410                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
2411                                         rs->iassign [ins->sreg1] = val;
2412                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2413                                 }
2414                                 if (spill) {
2415                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
2416                                         insert_before_ins (ins, tmp, store);
2417                                 }
2418                         }
2419                         rs->isymbolic [val] = prev_sreg1;
2420                         ins->sreg1 = val;
2421                 } else {
2422                         prev_sreg1 = -1;
2423                 }
2424                 /* handle clobbering of sreg1 */
2425                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
2426                         MonoInst *sreg2_copy = NULL;
2427                         MonoInst *copy = NULL;
2428
2429                         if (ins->dreg == ins->sreg2) {
2430                                 /* 
2431                                  * copying sreg1 to dreg could clobber sreg2, so allocate a new
2432                                  * register for it.
2433                                  */
2434                                 int reg2 = 0;
2435
2436                                 reg2 = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->sreg2, 0);
2437
2438                                 DEBUG (g_print ("\tneed to copy sreg2 %s to reg %s\n", mono_arch_regname (ins->sreg2), mono_arch_regname (reg2)));
2439                                 sreg2_copy = create_copy_ins (cfg, reg2, ins->sreg2, NULL);
2440                                 prev_sreg2 = ins->sreg2 = reg2;
2441
2442                                 mono_regstate_free_int (rs, reg2);
2443                         }
2444
2445                         copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
2446                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
2447                         insert_before_ins (ins, tmp, copy);
2448
2449                         if (sreg2_copy)
2450                                 insert_before_ins (copy, tmp, sreg2_copy);
2451
2452                         /*
2453                          * Need to prevent sreg2 to be allocated to sreg1, since that
2454                          * would screw up the previous copy.
2455                          */
2456                         src2_mask &= ~ (1 << ins->sreg1);
2457                         /* we set sreg1 to dest as well */
2458                         prev_sreg1 = ins->sreg1 = ins->dreg;
2459                         src2_mask &= ~ (1 << ins->dreg);
2460                 }
2461
2462                 /*
2463                  * TRACK SREG2
2464                  */
2465                 if (spec [MONO_INST_SRC2] == 'f') {
2466                         if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
2467                                 MonoInst *load;
2468                                 MonoInst *store = NULL;
2469
2470                                 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2471                                         GList *spill_node;
2472
2473                                         spill_node = g_list_first (fspill_list);
2474                                         g_assert (spill_node);
2475                                         if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
2476                                                 spill_node = g_list_next (spill_node);
2477         
2478                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
2479                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2480                                 } 
2481                                 
2482                                 fspill++;
2483                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2484                                 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
2485                                 insert_before_ins (ins, tmp, load);
2486                                 if (store) 
2487                                         insert_before_ins (load, tmp, store);
2488                         }
2489                 } 
2490                 else if (ins->sreg2 >= MONO_MAX_IREGS) {
2491                         val = rs->iassign [ins->sreg2];
2492                         prev_sreg2 = ins->sreg2;
2493                         if (val < 0) {
2494                                 int spill = 0;
2495                                 if (val < -1) {
2496                                         /* the register gets spilled after this inst */
2497                                         spill = -val -1;
2498                                 }
2499                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
2500                                 rs->iassign [ins->sreg2] = val;
2501                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
2502                                 if (spill)
2503                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
2504                         }
2505                         rs->isymbolic [val] = prev_sreg2;
2506                         ins->sreg2 = val;
2507                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != X86_ECX) {
2508                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [X86_ECX]));
2509                         }
2510                 } else {
2511                         prev_sreg2 = -1;
2512                 }
2513
2514                 if (spec [MONO_INST_CLOB] == 'c') {
2515                         int j, s;
2516                         guint32 clob_mask = X86_CALLEE_REGS;
2517                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
2518                                 s = 1 << j;
2519                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2520                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2521                                 }
2522                         }
2523                 }
2524                 if (spec [MONO_INST_CLOB] == 'a') {
2525                         guint32 clob_reg = X86_EAX;
2526                         if (!(rs->ifree_mask & (1 << clob_reg)) && (rs->isymbolic [clob_reg] >= 8)) {
2527                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2528                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2529                                 mono_regstate_free_int (rs, clob_reg);
2530                         }
2531                 }
2532                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2533                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2534                         mono_regstate_free_int (rs, ins->sreg1);
2535                 }
2536                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2537                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2538                         mono_regstate_free_int (rs, ins->sreg2);
2539                 }*/
2540         
2541                 //DEBUG (print_ins (i, ins));
2542                 /* this may result from a insert_before call */
2543                 if (!tmp->next)
2544                         bb->code = tmp->data;
2545                 tmp = tmp->next;
2546         }
2547
2548         g_free (reginfo);
2549         g_free (reginfof);
2550         g_list_free (fspill_list);
2551 }
2552
2553 static unsigned char*
2554 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2555 {
2556         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2557         x86_fnstcw_membase(code, X86_ESP, 0);
2558         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
2559         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2560         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
2561         x86_fldcw_membase (code, X86_ESP, 2);
2562         if (size == 8) {
2563                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2564                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2565                 x86_pop_reg (code, dreg);
2566                 /* FIXME: need the high register 
2567                  * x86_pop_reg (code, dreg_high);
2568                  */
2569         } else {
2570                 x86_push_reg (code, X86_EAX); // SP = SP - 4
2571                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
2572                 x86_pop_reg (code, dreg);
2573         }
2574         x86_fldcw_membase (code, X86_ESP, 0);
2575         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2576
2577         if (size == 1)
2578                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
2579         else if (size == 2)
2580                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
2581         return code;
2582 }
2583
2584 static unsigned char*
2585 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2586 {
2587         int sreg = tree->sreg1;
2588 #ifdef PLATFORM_WIN32
2589         guint8* br[5];
2590
2591         /*
2592          * Under Windows:
2593          * If requested stack size is larger than one page,
2594          * perform stack-touch operation
2595          */
2596         /*
2597          * Generate stack probe code.
2598          * Under Windows, it is necessary to allocate one page at a time,
2599          * "touching" stack after each successful sub-allocation. This is
2600          * because of the way stack growth is implemented - there is a
2601          * guard page before the lowest stack page that is currently commited.
2602          * Stack normally grows sequentially so OS traps access to the
2603          * guard page and commits more pages when needed.
2604          */
2605         x86_test_reg_imm (code, sreg, ~0xFFF);
2606         br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2607
2608         br[2] = code; /* loop */
2609         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
2610         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
2611         x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2612         x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2613         br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2614         x86_patch (br[3], br[2]);
2615         x86_test_reg_reg (code, sreg, sreg);
2616         br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2617         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2618
2619         br[1] = code; x86_jump8 (code, 0);
2620
2621         x86_patch (br[0], code);
2622         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2623         x86_patch (br[1], code);
2624         x86_patch (br[4], code);
2625 #else /* PLATFORM_WIN32 */
2626         x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
2627 #endif
2628         if (tree->flags & MONO_INST_INIT) {
2629                 int offset = 0;
2630                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
2631                         x86_push_reg (code, X86_EAX);
2632                         offset += 4;
2633                 }
2634                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
2635                         x86_push_reg (code, X86_ECX);
2636                         offset += 4;
2637                 }
2638                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
2639                         x86_push_reg (code, X86_EDI);
2640                         offset += 4;
2641                 }
2642                 
2643                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
2644                 if (sreg != X86_ECX)
2645                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
2646                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2647                                 
2648                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2649                 x86_cld (code);
2650                 x86_prefix (code, X86_REP_PREFIX);
2651                 x86_stosl (code);
2652                 
2653                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2654                         x86_pop_reg (code, X86_EDI);
2655                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2656                         x86_pop_reg (code, X86_ECX);
2657                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2658                         x86_pop_reg (code, X86_EAX);
2659         }
2660         return code;
2661 }
2662
2663
2664 static guint8*
2665 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2666 {
2667         CallInfo *cinfo;
2668         int quad;
2669
2670         /* Move return value to the target register */
2671         switch (ins->opcode) {
2672         case CEE_CALL:
2673         case OP_CALL_REG:
2674         case OP_CALL_MEMBASE:
2675                 if (ins->dreg != X86_EAX)
2676                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2677                 break;
2678         case OP_VCALL:
2679         case OP_VCALL_REG:
2680         case OP_VCALL_MEMBASE:
2681                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
2682                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2683                         /* Pop the destination address from the stack */
2684                         x86_pop_reg (code, X86_ECX);
2685                         
2686                         for (quad = 0; quad < 2; quad ++) {
2687                                 switch (cinfo->ret.pair_storage [quad]) {
2688                                 case ArgInIReg:
2689                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
2690                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
2691                                         break;
2692                                 case ArgNone:
2693                                         break;
2694                                 default:
2695                                         g_assert_not_reached ();
2696                                 }
2697                         }
2698                 }
2699                 g_free (cinfo);
2700         default:
2701                 break;
2702         }
2703
2704         return code;
2705 }
2706
2707 #define REAL_PRINT_REG(text,reg) \
2708 mono_assert (reg >= 0); \
2709 x86_push_reg (code, X86_EAX); \
2710 x86_push_reg (code, X86_EDX); \
2711 x86_push_reg (code, X86_ECX); \
2712 x86_push_reg (code, reg); \
2713 x86_push_imm (code, reg); \
2714 x86_push_imm (code, text " %d %p\n"); \
2715 x86_mov_reg_imm (code, X86_EAX, printf); \
2716 x86_call_reg (code, X86_EAX); \
2717 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2718 x86_pop_reg (code, X86_ECX); \
2719 x86_pop_reg (code, X86_EDX); \
2720 x86_pop_reg (code, X86_EAX);
2721
2722 /* benchmark and set based on cpu */
2723 #define LOOP_ALIGNMENT 8
2724 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2725
2726 void
2727 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2728 {
2729         MonoInst *ins;
2730         MonoCallInst *call;
2731         guint offset;
2732         guint8 *code = cfg->native_code + cfg->code_len;
2733         MonoInst *last_ins = NULL;
2734         guint last_offset = 0;
2735         int max_len, cpos;
2736
2737         if (cfg->opt & MONO_OPT_PEEPHOLE)
2738                 peephole_pass (cfg, bb);
2739
2740         if (cfg->opt & MONO_OPT_LOOP) {
2741                 int pad, align = LOOP_ALIGNMENT;
2742                 /* set alignment depending on cpu */
2743                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2744                         pad = align - pad;
2745                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2746                         x86_padding (code, pad);
2747                         cfg->code_len += pad;
2748                         bb->native_offset = cfg->code_len;
2749                 }
2750         }
2751
2752         if (cfg->verbose_level > 2)
2753                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2754
2755         cpos = bb->max_offset;
2756
2757         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2758                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2759                 g_assert (!cfg->compile_aot);
2760                 cpos += 6;
2761
2762                 cov->data [bb->dfn].cil_code = bb->cil_code;
2763                 /* this is not thread save, but good enough */
2764                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2765         }
2766
2767         offset = code - cfg->native_code;
2768
2769         ins = bb->code;
2770         while (ins) {
2771                 offset = code - cfg->native_code;
2772
2773                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2774
2775                 if (offset > (cfg->code_size - max_len - 16)) {
2776                         cfg->code_size *= 2;
2777                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2778                         code = cfg->native_code + offset;
2779                         mono_jit_stats.code_reallocs++;
2780                 }
2781
2782                 mono_debug_record_line_number (cfg, ins, offset);
2783
2784                 switch (ins->opcode) {
2785                 case OP_BIGMUL:
2786                         x86_mul_reg (code, ins->sreg2, TRUE);
2787                         break;
2788                 case OP_BIGMUL_UN:
2789                         x86_mul_reg (code, ins->sreg2, FALSE);
2790                         break;
2791                 case OP_X86_SETEQ_MEMBASE:
2792                 case OP_X86_SETNE_MEMBASE:
2793                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2794                                          ins->inst_basereg, ins->inst_offset, TRUE);
2795                         break;
2796                 case OP_STOREI1_MEMBASE_IMM:
2797                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2798                         break;
2799                 case OP_STOREI2_MEMBASE_IMM:
2800                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2801                         break;
2802                 case OP_STORE_MEMBASE_IMM:
2803                 case OP_STOREI4_MEMBASE_IMM:
2804                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2805                         break;
2806                 case OP_STOREI1_MEMBASE_REG:
2807                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2808                         break;
2809                 case OP_STOREI2_MEMBASE_REG:
2810                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2811                         break;
2812                 case OP_STORE_MEMBASE_REG:
2813                 case OP_STOREI4_MEMBASE_REG:
2814                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2815                         break;
2816                 case CEE_LDIND_I:
2817                 case CEE_LDIND_I4:
2818                 case CEE_LDIND_U4:
2819                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2820                         break;
2821                 case OP_LOADU4_MEM:
2822                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2823                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2824                         break;
2825                 case OP_LOAD_MEMBASE:
2826                 case OP_LOADI4_MEMBASE:
2827                 case OP_LOADU4_MEMBASE:
2828                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2829                         break;
2830                 case OP_LOADU1_MEMBASE:
2831                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2832                         break;
2833                 case OP_LOADI1_MEMBASE:
2834                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2835                         break;
2836                 case OP_LOADU2_MEMBASE:
2837                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2838                         break;
2839                 case OP_LOADI2_MEMBASE:
2840                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2841                         break;
2842                 case CEE_CONV_I1:
2843                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2844                         break;
2845                 case CEE_CONV_I2:
2846                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2847                         break;
2848                 case CEE_CONV_U1:
2849                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2850                         break;
2851                 case CEE_CONV_U2:
2852                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2853                         break;
2854                 case OP_COMPARE:
2855                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2856                         break;
2857                 case OP_COMPARE_IMM:
2858                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2859                         break;
2860                 case OP_X86_COMPARE_MEMBASE_REG:
2861                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2862                         break;
2863                 case OP_X86_COMPARE_MEMBASE_IMM:
2864                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2865                         break;
2866                 case OP_X86_COMPARE_MEMBASE8_IMM:
2867                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2868                         break;
2869                 case OP_X86_COMPARE_REG_MEMBASE:
2870                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2871                         break;
2872                 case OP_X86_COMPARE_MEM_IMM:
2873                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2874                         break;
2875                 case OP_X86_TEST_NULL:
2876                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2877                         break;
2878                 case OP_X86_ADD_MEMBASE_IMM:
2879                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2880                         break;
2881                 case OP_X86_ADD_MEMBASE:
2882                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2883                         break;
2884                 case OP_X86_SUB_MEMBASE_IMM:
2885                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2886                         break;
2887                 case OP_X86_SUB_MEMBASE:
2888                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2889                         break;
2890                 case OP_X86_INC_MEMBASE:
2891                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2892                         break;
2893                 case OP_X86_INC_REG:
2894                         x86_inc_reg (code, ins->dreg);
2895                         break;
2896                 case OP_X86_DEC_MEMBASE:
2897                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2898                         break;
2899                 case OP_X86_DEC_REG:
2900                         x86_dec_reg (code, ins->dreg);
2901                         break;
2902                 case OP_X86_MUL_MEMBASE:
2903                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2904                         break;
2905                 case CEE_BREAK:
2906                         x86_breakpoint (code);
2907                         break;
2908                 case OP_ADDCC:
2909                 case CEE_ADD:
2910                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2911                         break;
2912                 case OP_ADC:
2913                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2914                         break;
2915                 case OP_ADDCC_IMM:
2916                 case OP_ADD_IMM:
2917                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2918                         break;
2919                 case OP_ADC_IMM:
2920                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2921                         break;
2922                 case OP_SUBCC:
2923                 case CEE_SUB:
2924                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2925                         break;
2926                 case OP_SBB:
2927                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2928                         break;
2929                 case OP_SUBCC_IMM:
2930                 case OP_SUB_IMM:
2931                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2932                         break;
2933                 case OP_SBB_IMM:
2934                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2935                         break;
2936                 case CEE_AND:
2937                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2938                         break;
2939                 case OP_AND_IMM:
2940                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2941                         break;
2942                 case CEE_DIV:
2943                         x86_cdq (code);
2944                         x86_div_reg (code, ins->sreg2, TRUE);
2945                         break;
2946                 case CEE_DIV_UN:
2947                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2948                         x86_div_reg (code, ins->sreg2, FALSE);
2949                         break;
2950                 case OP_DIV_IMM:
2951                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2952                         x86_cdq (code);
2953                         x86_div_reg (code, ins->sreg2, TRUE);
2954                         break;
2955                 case CEE_REM:
2956                         x86_cdq (code);
2957                         x86_div_reg (code, ins->sreg2, TRUE);
2958                         break;
2959                 case CEE_REM_UN:
2960                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2961                         x86_div_reg (code, ins->sreg2, FALSE);
2962                         break;
2963                 case OP_REM_IMM:
2964                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2965                         x86_cdq (code);
2966                         x86_div_reg (code, ins->sreg2, TRUE);
2967                         break;
2968                 case CEE_OR:
2969                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2970                         break;
2971                 case OP_OR_IMM:
2972                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2973                         break;
2974                 case CEE_XOR:
2975                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2976                         break;
2977                 case OP_XOR_IMM:
2978                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2979                         break;
2980                 case CEE_SHL:
2981                         g_assert (ins->sreg2 == X86_ECX);
2982                         x86_shift_reg (code, X86_SHL, ins->dreg);
2983                         break;
2984                 case CEE_SHR:
2985                         g_assert (ins->sreg2 == X86_ECX);
2986                         x86_shift_reg (code, X86_SAR, ins->dreg);
2987                         break;
2988                 case OP_SHR_IMM:
2989                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2990                         break;
2991                 case OP_SHR_UN_IMM:
2992                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2993                         break;
2994                 case CEE_SHR_UN:
2995                         g_assert (ins->sreg2 == X86_ECX);
2996                         x86_shift_reg (code, X86_SHR, ins->dreg);
2997                         break;
2998                 case OP_SHL_IMM:
2999                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
3000                         break;
3001                 case OP_LSHL: {
3002                         guint8 *jump_to_end;
3003
3004                         /* handle shifts below 32 bits */
3005                         x86_shld_reg (code, ins->unused, ins->sreg1);
3006                         x86_shift_reg (code, X86_SHL, ins->sreg1);
3007
3008                         x86_test_reg_imm (code, X86_ECX, 32);
3009                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3010
3011                         /* handle shift over 32 bit */
3012                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3013                         x86_clear_reg (code, ins->sreg1);
3014                         
3015                         x86_patch (jump_to_end, code);
3016                         }
3017                         break;
3018                 case OP_LSHR: {
3019                         guint8 *jump_to_end;
3020
3021                         /* handle shifts below 32 bits */
3022                         x86_shrd_reg (code, ins->sreg1, ins->unused);
3023                         x86_shift_reg (code, X86_SAR, ins->unused);
3024
3025                         x86_test_reg_imm (code, X86_ECX, 32);
3026                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
3027
3028                         /* handle shifts over 31 bits */
3029                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3030                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
3031                         
3032                         x86_patch (jump_to_end, code);
3033                         }
3034                         break;
3035                 case OP_LSHR_UN: {
3036                         guint8 *jump_to_end;
3037
3038                         /* handle shifts below 32 bits */
3039                         x86_shrd_reg (code, ins->sreg1, ins->unused);
3040                         x86_shift_reg (code, X86_SHR, ins->unused);
3041
3042                         x86_test_reg_imm (code, X86_ECX, 32);
3043                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
3044
3045                         /* handle shifts over 31 bits */
3046                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3047                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
3048                         
3049                         x86_patch (jump_to_end, code);
3050                         }
3051                         break;
3052                 case OP_LSHL_IMM:
3053                         if (ins->inst_imm >= 32) {
3054                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3055                                 x86_clear_reg (code, ins->sreg1);
3056                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
3057                         } else {
3058                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
3059                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
3060                         }
3061                         break;
3062                 case OP_LSHR_IMM:
3063                         if (ins->inst_imm >= 32) {
3064                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
3065                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
3066                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
3067                         } else {
3068                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3069                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
3070                         }
3071                         break;
3072                 case OP_LSHR_UN_IMM:
3073                         if (ins->inst_imm >= 32) {
3074                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3075                                 x86_clear_reg (code, ins->unused);
3076                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
3077                         } else {
3078                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3079                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
3080                         }
3081                         break;
3082                 case CEE_NOT:
3083                         x86_not_reg (code, ins->sreg1);
3084                         break;
3085                 case CEE_NEG:
3086                         x86_neg_reg (code, ins->sreg1);
3087                         break;
3088                 case OP_SEXT_I1:
3089                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3090                         break;
3091                 case OP_SEXT_I2:
3092                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3093                         break;
3094                 case CEE_MUL:
3095                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3096                         break;
3097                 case OP_MUL_IMM:
3098                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
3099                         break;
3100                 case CEE_MUL_OVF:
3101                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3102                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3103                         break;
3104                 case CEE_MUL_OVF_UN: {
3105                         /* the mul operation and the exception check should most likely be split */
3106                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
3107                         /*g_assert (ins->sreg2 == X86_EAX);
3108                         g_assert (ins->dreg == X86_EAX);*/
3109                         if (ins->sreg2 == X86_EAX) {
3110                                 non_eax_reg = ins->sreg1;
3111                         } else if (ins->sreg1 == X86_EAX) {
3112                                 non_eax_reg = ins->sreg2;
3113                         } else {
3114                                 /* no need to save since we're going to store to it anyway */
3115                                 if (ins->dreg != X86_EAX) {
3116                                         saved_eax = TRUE;
3117                                         x86_push_reg (code, X86_EAX);
3118                                 }
3119                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
3120                                 non_eax_reg = ins->sreg2;
3121                         }
3122                         if (ins->dreg == X86_EDX) {
3123                                 if (!saved_eax) {
3124                                         saved_eax = TRUE;
3125                                         x86_push_reg (code, X86_EAX);
3126                                 }
3127                         } else if (ins->dreg != X86_EAX) {
3128                                 saved_edx = TRUE;
3129                                 x86_push_reg (code, X86_EDX);
3130                         }
3131                         x86_mul_reg (code, non_eax_reg, FALSE);
3132                         /* save before the check since pop and mov don't change the flags */
3133                         if (ins->dreg != X86_EAX)
3134                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3135                         if (saved_edx)
3136                                 x86_pop_reg (code, X86_EDX);
3137                         if (saved_eax)
3138                                 x86_pop_reg (code, X86_EAX);
3139                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3140                         break;
3141                 }
3142                 case OP_ICONST:
3143                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
3144                         break;
3145                 case OP_AOTCONST:
3146                         g_assert_not_reached ();
3147                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
3148                         x86_mov_reg_imm (code, ins->dreg, 0);
3149                         break;
3150                 case OP_LOAD_GOTADDR:
3151                         x86_call_imm (code, 0);
3152                         /* 
3153                          * The patch needs to point to the pop, since the GOT offset needs 
3154                          * to be added to that address.
3155                          */
3156                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3157                         x86_pop_reg (code, ins->dreg);
3158                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
3159                         break;
3160                 case OP_GOT_ENTRY:
3161                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3162                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
3163                         break;
3164                 case OP_X86_PUSH_GOT_ENTRY:
3165                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3166                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
3167                         break;
3168                 case CEE_CONV_I4:
3169                 case OP_MOVE:
3170                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3171                         break;
3172                 case CEE_CONV_U4:
3173                         g_assert_not_reached ();
3174                 case CEE_JMP: {
3175                         /*
3176                          * Note: this 'frame destruction' logic is useful for tail calls, too.
3177                          * Keep in sync with the code in emit_epilog.
3178                          */
3179                         int pos = 0;
3180
3181                         /* FIXME: no tracing support... */
3182                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3183                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3184                         /* reset offset to make max_len work */
3185                         offset = code - cfg->native_code;
3186
3187                         g_assert (!cfg->method->save_lmf);
3188
3189                         if (cfg->used_int_regs & (1 << X86_EBX))
3190                                 pos -= 4;
3191                         if (cfg->used_int_regs & (1 << X86_EDI))
3192                                 pos -= 4;
3193                         if (cfg->used_int_regs & (1 << X86_ESI))
3194                                 pos -= 4;
3195                         if (pos)
3196                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3197         
3198                         if (cfg->used_int_regs & (1 << X86_ESI))
3199                                 x86_pop_reg (code, X86_ESI);
3200                         if (cfg->used_int_regs & (1 << X86_EDI))
3201                                 x86_pop_reg (code, X86_EDI);
3202                         if (cfg->used_int_regs & (1 << X86_EBX))
3203                                 x86_pop_reg (code, X86_EBX);
3204         
3205                         /* restore ESP/EBP */
3206                         x86_leave (code);
3207                         offset = code - cfg->native_code;
3208                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3209                         x86_jump32 (code, 0);
3210                         break;
3211                 }
3212                 case OP_CHECK_THIS:
3213                         /* ensure ins->sreg1 is not NULL
3214                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
3215                          * cmp DWORD PTR [eax], 0
3216                          */
3217                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
3218                         break;
3219                 case OP_ARGLIST: {
3220                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
3221                         x86_push_reg (code, hreg);
3222                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
3223                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
3224                         x86_pop_reg (code, hreg);
3225                         break;
3226                 }
3227                 case OP_FCALL:
3228                 case OP_LCALL:
3229                 case OP_VCALL:
3230                 case OP_VOIDCALL:
3231                 case CEE_CALL:
3232                         call = (MonoCallInst*)ins;
3233                         if (ins->flags & MONO_INST_HAS_METHOD)
3234                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
3235                         else
3236                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
3237                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3238                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
3239                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
3240                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
3241                                  * smart enough to do that optimization yet
3242                                  *
3243                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
3244                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
3245                                  * (most likely from locality benefits). People with other processors should
3246                                  * check on theirs to see what happens.
3247                                  */
3248                                 if (call->stack_usage == 4) {
3249                                         /* we want to use registers that won't get used soon, so use
3250                                          * ecx, as eax will get allocated first. edx is used by long calls,
3251                                          * so we can't use that.
3252                                          */
3253                                         
3254                                         x86_pop_reg (code, X86_ECX);
3255                                 } else {
3256                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3257                                 }
3258                         }
3259                         code = emit_move_return_value (cfg, ins, code);
3260                         break;
3261                 case OP_FCALL_REG:
3262                 case OP_LCALL_REG:
3263                 case OP_VCALL_REG:
3264                 case OP_VOIDCALL_REG:
3265                 case OP_CALL_REG:
3266                         call = (MonoCallInst*)ins;
3267                         x86_call_reg (code, ins->sreg1);
3268                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3269                                 if (call->stack_usage == 4)
3270                                         x86_pop_reg (code, X86_ECX);
3271                                 else
3272                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3273                         }
3274                         code = emit_move_return_value (cfg, ins, code);
3275                         break;
3276                 case OP_FCALL_MEMBASE:
3277                 case OP_LCALL_MEMBASE:
3278                 case OP_VCALL_MEMBASE:
3279                 case OP_VOIDCALL_MEMBASE:
3280                 case OP_CALL_MEMBASE:
3281                         call = (MonoCallInst*)ins;
3282                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
3283                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3284                                 if (call->stack_usage == 4)
3285                                         x86_pop_reg (code, X86_ECX);
3286                                 else
3287                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3288                         }
3289                         code = emit_move_return_value (cfg, ins, code);
3290                         break;
3291                 case OP_OUTARG:
3292                 case OP_X86_PUSH:
3293                         x86_push_reg (code, ins->sreg1);
3294                         break;
3295                 case OP_X86_PUSH_IMM:
3296                         x86_push_imm (code, ins->inst_imm);
3297                         break;
3298                 case OP_X86_PUSH_MEMBASE:
3299                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
3300                         break;
3301                 case OP_X86_PUSH_OBJ: 
3302                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
3303                         x86_push_reg (code, X86_EDI);
3304                         x86_push_reg (code, X86_ESI);
3305                         x86_push_reg (code, X86_ECX);
3306                         if (ins->inst_offset)
3307                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
3308                         else
3309                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
3310                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
3311                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
3312                         x86_cld (code);
3313                         x86_prefix (code, X86_REP_PREFIX);
3314                         x86_movsd (code);
3315                         x86_pop_reg (code, X86_ECX);
3316                         x86_pop_reg (code, X86_ESI);
3317                         x86_pop_reg (code, X86_EDI);
3318                         break;
3319                 case OP_X86_LEA:
3320                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
3321                         break;
3322                 case OP_X86_LEA_MEMBASE:
3323                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3324                         break;
3325                 case OP_X86_XCHG:
3326                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3327                         break;
3328                 case OP_LOCALLOC:
3329                         /* keep alignment */
3330                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
3331                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
3332                         code = mono_emit_stack_alloc (code, ins);
3333                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
3334                         break;
3335                 case CEE_RET:
3336                         x86_ret (code);
3337                         break;
3338                 case CEE_THROW: {
3339                         x86_push_reg (code, ins->sreg1);
3340                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3341                                                           (gpointer)"mono_arch_throw_exception");
3342                         break;
3343                 }
3344                 case OP_RETHROW: {
3345                         x86_push_reg (code, ins->sreg1);
3346                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3347                                                           (gpointer)"mono_arch_rethrow_exception");
3348                         break;
3349                 }
3350                 case OP_CALL_HANDLER: 
3351                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3352                         x86_call_imm (code, 0);
3353                         break;
3354                 case OP_LABEL:
3355                         ins->inst_c0 = code - cfg->native_code;
3356                         break;
3357                 case CEE_BR:
3358                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
3359                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
3360                         //break;
3361                         if (ins->flags & MONO_INST_BRLABEL) {
3362                                 if (ins->inst_i0->inst_c0) {
3363                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
3364                                 } else {
3365                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
3366                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3367                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
3368                                                 x86_jump8 (code, 0);
3369                                         else 
3370                                                 x86_jump32 (code, 0);
3371                                 }
3372                         } else {
3373                                 if (ins->inst_target_bb->native_offset) {
3374                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
3375                                 } else {
3376                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3377                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3378                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3379                                                 x86_jump8 (code, 0);
3380                                         else 
3381                                                 x86_jump32 (code, 0);
3382                                 } 
3383                         }
3384                         break;
3385                 case OP_BR_REG:
3386                         x86_jump_reg (code, ins->sreg1);
3387                         break;
3388                 case OP_CEQ:
3389                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3390                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3391                         break;
3392                 case OP_CLT:
3393                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
3394                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3395                         break;
3396                 case OP_CLT_UN:
3397                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3398                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3399                         break;
3400                 case OP_CGT:
3401                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
3402                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3403                         break;
3404                 case OP_CGT_UN:
3405                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3406                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3407                         break;
3408                 case OP_CNE:
3409                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
3410                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3411                         break;
3412                 case OP_COND_EXC_EQ:
3413                 case OP_COND_EXC_NE_UN:
3414                 case OP_COND_EXC_LT:
3415                 case OP_COND_EXC_LT_UN:
3416                 case OP_COND_EXC_GT:
3417                 case OP_COND_EXC_GT_UN:
3418                 case OP_COND_EXC_GE:
3419                 case OP_COND_EXC_GE_UN:
3420                 case OP_COND_EXC_LE:
3421                 case OP_COND_EXC_LE_UN:
3422                 case OP_COND_EXC_OV:
3423                 case OP_COND_EXC_NO:
3424                 case OP_COND_EXC_C:
3425                 case OP_COND_EXC_NC:
3426                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
3427                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3428                         break;
3429                 case CEE_BEQ:
3430                 case CEE_BNE_UN:
3431                 case CEE_BLT:
3432                 case CEE_BLT_UN:
3433                 case CEE_BGT:
3434                 case CEE_BGT_UN:
3435                 case CEE_BGE:
3436                 case CEE_BGE_UN:
3437                 case CEE_BLE:
3438                 case CEE_BLE_UN:
3439                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
3440                         break;
3441
3442                 /* floating point opcodes */
3443                 case OP_R8CONST: {
3444                         double d = *(double *)ins->inst_p0;
3445
3446                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3447                                 x86_fldz (code);
3448                         } else if (d == 1.0) {
3449                                 x86_fld1 (code);
3450                         } else {
3451                                 if (cfg->compile_aot) {
3452                                         guint32 *val = (guint32*)&d;
3453                                         x86_push_imm (code, val [1]);
3454                                         x86_push_imm (code, val [0]);
3455                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
3456                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3457                                 }
3458                                 else {
3459                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3460                                         x86_fld (code, NULL, TRUE);
3461                                 }
3462                         }
3463                         break;
3464                 }
3465                 case OP_R4CONST: {
3466                         float f = *(float *)ins->inst_p0;
3467
3468                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3469                                 x86_fldz (code);
3470                         } else if (f == 1.0) {
3471                                 x86_fld1 (code);
3472                         } else {
3473                                 if (cfg->compile_aot) {
3474                                         guint32 val = *(guint32*)&f;
3475                                         x86_push_imm (code, val);
3476                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
3477                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3478                                 }
3479                                 else {
3480                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3481                                         x86_fld (code, NULL, FALSE);
3482                                 }
3483                         }
3484                         break;
3485                 }
3486                 case OP_STORER8_MEMBASE_REG:
3487                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3488                         break;
3489                 case OP_LOADR8_SPILL_MEMBASE:
3490                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3491                         x86_fxch (code, 1);
3492                         break;
3493                 case OP_LOADR8_MEMBASE:
3494                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3495                         break;
3496                 case OP_STORER4_MEMBASE_REG:
3497                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3498                         break;
3499                 case OP_LOADR4_MEMBASE:
3500                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3501                         break;
3502                 case CEE_CONV_R4: /* FIXME: change precision */
3503                 case CEE_CONV_R8:
3504                         x86_push_reg (code, ins->sreg1);
3505                         x86_fild_membase (code, X86_ESP, 0, FALSE);
3506                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3507                         break;
3508                 case OP_X86_FP_LOAD_I8:
3509                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3510                         break;
3511                 case OP_X86_FP_LOAD_I4:
3512                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3513                         break;
3514                 case OP_FCONV_TO_I1:
3515                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3516                         break;
3517                 case OP_FCONV_TO_U1:
3518                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3519                         break;
3520                 case OP_FCONV_TO_I2:
3521                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3522                         break;
3523                 case OP_FCONV_TO_U2:
3524                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3525                         break;
3526                 case OP_FCONV_TO_I4:
3527                 case OP_FCONV_TO_I:
3528                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3529                         break;
3530                 case OP_FCONV_TO_I8:
3531                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3532                         x86_fnstcw_membase(code, X86_ESP, 0);
3533                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3534                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3535                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3536                         x86_fldcw_membase (code, X86_ESP, 2);
3537                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3538                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3539                         x86_pop_reg (code, ins->dreg);
3540                         x86_pop_reg (code, ins->unused);
3541                         x86_fldcw_membase (code, X86_ESP, 0);
3542                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3543                         break;
3544                 case OP_LCONV_TO_R_UN: { 
3545                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3546                         guint8 *br;
3547
3548                         /* load 64bit integer to FP stack */
3549                         x86_push_imm (code, 0);
3550                         x86_push_reg (code, ins->sreg2);
3551                         x86_push_reg (code, ins->sreg1);
3552                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3553                         /* store as 80bit FP value */
3554                         x86_fst80_membase (code, X86_ESP, 0);
3555                         
3556                         /* test if lreg is negative */
3557                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3558                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3559         
3560                         /* add correction constant mn */
3561                         x86_fld80_mem (code, mn);
3562                         x86_fld80_membase (code, X86_ESP, 0);
3563                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3564                         x86_fst80_membase (code, X86_ESP, 0);
3565
3566                         x86_patch (br, code);
3567
3568                         x86_fld80_membase (code, X86_ESP, 0);
3569                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
3570
3571                         break;
3572                 }
3573                 case OP_LCONV_TO_OVF_I: {
3574                         guint8 *br [3], *label [1];
3575
3576                         /* 
3577                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3578                          */
3579                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3580
3581                         /* If the low word top bit is set, see if we are negative */
3582                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3583                         /* We are not negative (no top bit set, check for our top word to be zero */
3584                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3585                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3586                         label [0] = code;
3587
3588                         /* throw exception */
3589                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3590                         x86_jump32 (code, 0);
3591         
3592                         x86_patch (br [0], code);
3593                         /* our top bit is set, check that top word is 0xfffffff */
3594                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3595                 
3596                         x86_patch (br [1], code);
3597                         /* nope, emit exception */
3598                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3599                         x86_patch (br [2], label [0]);
3600
3601                         if (ins->dreg != ins->sreg1)
3602                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3603                         break;
3604                 }
3605                 case OP_FADD:
3606                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3607                         break;
3608                 case OP_FSUB:
3609                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3610                         break;          
3611                 case OP_FMUL:
3612                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3613                         break;          
3614                 case OP_FDIV:
3615                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3616                         break;          
3617                 case OP_FNEG:
3618                         x86_fchs (code);
3619                         break;          
3620                 case OP_SIN:
3621                         x86_fsin (code);
3622                         x86_fldz (code);
3623                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3624                         break;          
3625                 case OP_COS:
3626                         x86_fcos (code);
3627                         x86_fldz (code);
3628                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3629                         break;          
3630                 case OP_ABS:
3631                         x86_fabs (code);
3632                         break;          
3633                 case OP_TAN: {
3634                         /* 
3635                          * it really doesn't make sense to inline all this code,
3636                          * it's here just to show that things may not be as simple 
3637                          * as they appear.
3638                          */
3639                         guchar *check_pos, *end_tan, *pop_jump;
3640                         x86_push_reg (code, X86_EAX);
3641                         x86_fptan (code);
3642                         x86_fnstsw (code);
3643                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3644                         check_pos = code;
3645                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3646                         x86_fstp (code, 0); /* pop the 1.0 */
3647                         end_tan = code;
3648                         x86_jump8 (code, 0);
3649                         x86_fldpi (code);
3650                         x86_fp_op (code, X86_FADD, 0);
3651                         x86_fxch (code, 1);
3652                         x86_fprem1 (code);
3653                         x86_fstsw (code);
3654                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3655                         pop_jump = code;
3656                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3657                         x86_fstp (code, 1);
3658                         x86_fptan (code);
3659                         x86_patch (pop_jump, code);
3660                         x86_fstp (code, 0); /* pop the 1.0 */
3661                         x86_patch (check_pos, code);
3662                         x86_patch (end_tan, code);
3663                         x86_fldz (code);
3664                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3665                         x86_pop_reg (code, X86_EAX);
3666                         break;
3667                 }
3668                 case OP_ATAN:
3669                         x86_fld1 (code);
3670                         x86_fpatan (code);
3671                         x86_fldz (code);
3672                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3673                         break;          
3674                 case OP_SQRT:
3675                         x86_fsqrt (code);
3676                         break;          
3677                 case OP_X86_FPOP:
3678                         x86_fstp (code, 0);
3679                         break;          
3680                 case OP_FREM: {
3681                         guint8 *l1, *l2;
3682
3683                         x86_push_reg (code, X86_EAX);
3684                         /* we need to exchange ST(0) with ST(1) */
3685                         x86_fxch (code, 1);
3686
3687                         /* this requires a loop, because fprem somtimes 
3688                          * returns a partial remainder */
3689                         l1 = code;
3690                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3691                         /* x86_fprem1 (code); */
3692                         x86_fprem (code);
3693                         x86_fnstsw (code);
3694                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3695                         l2 = code + 2;
3696                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3697
3698                         /* pop result */
3699                         x86_fstp (code, 1);
3700
3701                         x86_pop_reg (code, X86_EAX);
3702                         break;
3703                 }
3704                 case OP_FCOMPARE:
3705                         if (cfg->opt & MONO_OPT_FCMOV) {
3706                                 x86_fcomip (code, 1);
3707                                 x86_fstp (code, 0);
3708                                 break;
3709                         }
3710                         /* this overwrites EAX */
3711                         EMIT_FPCOMPARE(code);
3712                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3713                         break;
3714                 case OP_FCEQ:
3715                         if (cfg->opt & MONO_OPT_FCMOV) {
3716                                 /* zeroing the register at the start results in 
3717                                  * shorter and faster code (we can also remove the widening op)
3718                                  */
3719                                 guchar *unordered_check;
3720                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3721                                 x86_fcomip (code, 1);
3722                                 x86_fstp (code, 0);
3723                                 unordered_check = code;
3724                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3725                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3726                                 x86_patch (unordered_check, code);
3727                                 break;
3728                         }
3729                         if (ins->dreg != X86_EAX) 
3730                                 x86_push_reg (code, X86_EAX);
3731
3732                         EMIT_FPCOMPARE(code);
3733                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3734                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3735                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3736                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3737
3738                         if (ins->dreg != X86_EAX) 
3739                                 x86_pop_reg (code, X86_EAX);
3740                         break;
3741                 case OP_FCLT:
3742                 case OP_FCLT_UN:
3743                         if (cfg->opt & MONO_OPT_FCMOV) {
3744                                 /* zeroing the register at the start results in 
3745                                  * shorter and faster code (we can also remove the widening op)
3746                                  */
3747                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3748                                 x86_fcomip (code, 1);
3749                                 x86_fstp (code, 0);
3750                                 if (ins->opcode == OP_FCLT_UN) {
3751                                         guchar *unordered_check = code;
3752                                         guchar *jump_to_end;
3753                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3754                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3755                                         jump_to_end = code;
3756                                         x86_jump8 (code, 0);
3757                                         x86_patch (unordered_check, code);
3758                                         x86_inc_reg (code, ins->dreg);
3759                                         x86_patch (jump_to_end, code);
3760                                 } else {
3761                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3762                                 }
3763                                 break;
3764                         }
3765                         if (ins->dreg != X86_EAX) 
3766                                 x86_push_reg (code, X86_EAX);
3767
3768                         EMIT_FPCOMPARE(code);
3769                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3770                         if (ins->opcode == OP_FCLT_UN) {
3771                                 guchar *is_not_zero_check, *end_jump;
3772                                 is_not_zero_check = code;
3773                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3774                                 end_jump = code;
3775                                 x86_jump8 (code, 0);
3776                                 x86_patch (is_not_zero_check, code);
3777                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3778
3779                                 x86_patch (end_jump, code);
3780                         }
3781                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3782                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3783
3784                         if (ins->dreg != X86_EAX) 
3785                                 x86_pop_reg (code, X86_EAX);
3786                         break;
3787                 case OP_FCGT:
3788                 case OP_FCGT_UN:
3789                         if (cfg->opt & MONO_OPT_FCMOV) {
3790                                 /* zeroing the register at the start results in 
3791                                  * shorter and faster code (we can also remove the widening op)
3792                                  */
3793                                 guchar *unordered_check;
3794                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3795                                 x86_fcomip (code, 1);
3796                                 x86_fstp (code, 0);
3797                                 if (ins->opcode == OP_FCGT) {
3798                                         unordered_check = code;
3799                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3800                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3801                                         x86_patch (unordered_check, code);
3802                                 } else {
3803                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3804                                 }
3805                                 break;
3806                         }
3807                         if (ins->dreg != X86_EAX) 
3808                                 x86_push_reg (code, X86_EAX);
3809
3810                         EMIT_FPCOMPARE(code);
3811                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3812                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3813                         if (ins->opcode == OP_FCGT_UN) {
3814                                 guchar *is_not_zero_check, *end_jump;
3815                                 is_not_zero_check = code;
3816                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3817                                 end_jump = code;
3818                                 x86_jump8 (code, 0);
3819                                 x86_patch (is_not_zero_check, code);
3820                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3821         
3822                                 x86_patch (end_jump, code);
3823                         }
3824                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3825                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3826
3827                         if (ins->dreg != X86_EAX) 
3828                                 x86_pop_reg (code, X86_EAX);
3829                         break;
3830                 case OP_FBEQ:
3831                         if (cfg->opt & MONO_OPT_FCMOV) {
3832                                 guchar *jump = code;
3833                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3834                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3835                                 x86_patch (jump, code);
3836                                 break;
3837                         }
3838                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3839                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3840                         break;
3841                 case OP_FBNE_UN:
3842                         /* Branch if C013 != 100 */
3843                         if (cfg->opt & MONO_OPT_FCMOV) {
3844                                 /* branch if !ZF or (PF|CF) */
3845                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3846                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3847                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3848                                 break;
3849                         }
3850                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3851                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3852                         break;
3853                 case OP_FBLT:
3854                         if (cfg->opt & MONO_OPT_FCMOV) {
3855                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3856                                 break;
3857                         }
3858                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3859                         break;
3860                 case OP_FBLT_UN:
3861                         if (cfg->opt & MONO_OPT_FCMOV) {
3862                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3863                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3864                                 break;
3865                         }
3866                         if (ins->opcode == OP_FBLT_UN) {
3867                                 guchar *is_not_zero_check, *end_jump;
3868                                 is_not_zero_check = code;
3869                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3870                                 end_jump = code;
3871                                 x86_jump8 (code, 0);
3872                                 x86_patch (is_not_zero_check, code);
3873                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3874
3875                                 x86_patch (end_jump, code);
3876                         }
3877                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3878                         break;
3879                 case OP_FBGT:
3880                 case OP_FBGT_UN:
3881                         if (cfg->opt & MONO_OPT_FCMOV) {
3882                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3883                                 break;
3884                         }
3885                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3886                         if (ins->opcode == OP_FBGT_UN) {
3887                                 guchar *is_not_zero_check, *end_jump;
3888                                 is_not_zero_check = code;
3889                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3890                                 end_jump = code;
3891                                 x86_jump8 (code, 0);
3892                                 x86_patch (is_not_zero_check, code);
3893                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3894
3895                                 x86_patch (end_jump, code);
3896                         }
3897                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3898                         break;
3899                 case OP_FBGE:
3900                         /* Branch if C013 == 100 or 001 */
3901                         if (cfg->opt & MONO_OPT_FCMOV) {
3902                                 guchar *br1;
3903
3904                                 /* skip branch if C1=1 */
3905                                 br1 = code;
3906                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3907                                 /* branch if (C0 | C3) = 1 */
3908                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3909                                 x86_patch (br1, code);
3910                                 break;
3911                         }
3912                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3913                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3914                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3915                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3916                         break;
3917                 case OP_FBGE_UN:
3918                         /* Branch if C013 == 000 */
3919                         if (cfg->opt & MONO_OPT_FCMOV) {
3920                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3921                                 break;
3922                         }
3923                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3924                         break;
3925                 case OP_FBLE:
3926                         /* Branch if C013=000 or 100 */
3927                         if (cfg->opt & MONO_OPT_FCMOV) {
3928                                 guchar *br1;
3929
3930                                 /* skip branch if C1=1 */
3931                                 br1 = code;
3932                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3933                                 /* branch if C0=0 */
3934                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3935                                 x86_patch (br1, code);
3936                                 break;
3937                         }
3938                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3939                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3940                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3941                         break;
3942                 case OP_FBLE_UN:
3943                         /* Branch if C013 != 001 */
3944                         if (cfg->opt & MONO_OPT_FCMOV) {
3945                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3946                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3947                                 break;
3948                         }
3949                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3950                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3951                         break;
3952                 case CEE_CKFINITE: {
3953                         x86_push_reg (code, X86_EAX);
3954                         x86_fxam (code);
3955                         x86_fnstsw (code);
3956                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3957                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3958                         x86_pop_reg (code, X86_EAX);
3959                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3960                         break;
3961                 }
3962                 case OP_TLS_GET: {
3963                         x86_prefix (code, X86_GS_PREFIX);
3964                         x86_mov_reg_mem (code, ins->dreg, ins->inst_offset, 4);                 
3965                         break;
3966                 }
3967                 case OP_ATOMIC_ADD_I4: {
3968                         int dreg = ins->dreg;
3969
3970                         if (dreg == ins->inst_basereg) {
3971                                 x86_push_reg (code, ins->sreg2);
3972                                 dreg = ins->sreg2;
3973                         } 
3974                         
3975                         if (dreg != ins->sreg2)
3976                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3977
3978                         x86_prefix (code, X86_LOCK_PREFIX);
3979                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3980
3981                         if (dreg != ins->dreg) {
3982                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3983                                 x86_pop_reg (code, dreg);
3984                         }
3985
3986                         break;
3987                 }
3988                 case OP_ATOMIC_ADD_NEW_I4: {
3989                         int dreg = ins->dreg;
3990
3991                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3992                         if (ins->sreg2 == dreg) {
3993                                 if (dreg == X86_EBX) {
3994                                         dreg = X86_EDI;
3995                                         if (ins->inst_basereg == X86_EDI)
3996                                                 dreg = X86_ESI;
3997                                 } else {
3998                                         dreg = X86_EBX;
3999                                         if (ins->inst_basereg == X86_EBX)
4000                                                 dreg = X86_EDI;
4001                                 }
4002                         } else if (ins->inst_basereg == dreg) {
4003                                 if (dreg == X86_EBX) {
4004                                         dreg = X86_EDI;
4005                                         if (ins->sreg2 == X86_EDI)
4006                                                 dreg = X86_ESI;
4007                                 } else {
4008                                         dreg = X86_EBX;
4009                                         if (ins->sreg2 == X86_EBX)
4010                                                 dreg = X86_EDI;
4011                                 }
4012                         }
4013
4014                         if (dreg != ins->dreg) {
4015                                 x86_push_reg (code, dreg);
4016                         }
4017
4018                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
4019                         x86_prefix (code, X86_LOCK_PREFIX);
4020                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4021                         /* dreg contains the old value, add with sreg2 value */
4022                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
4023                         
4024                         if (ins->dreg != dreg) {
4025                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4026                                 x86_pop_reg (code, dreg);
4027                         }
4028
4029                         break;
4030                 }
4031                 case OP_ATOMIC_EXCHANGE_I4: {
4032                         guchar *br[2];
4033                         int sreg2 = ins->sreg2;
4034                         int breg = ins->inst_basereg;
4035
4036                         /* cmpxchg uses eax as comperand, need to make sure we can use it
4037                          * hack to overcome limits in x86 reg allocator 
4038                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
4039                          */
4040                         if (ins->dreg != X86_EAX)
4041                                 x86_push_reg (code, X86_EAX);
4042                         
4043                         /* We need the EAX reg for the cmpxchg */
4044                         if (ins->sreg2 == X86_EAX) {
4045                                 x86_push_reg (code, X86_EDX);
4046                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
4047                                 sreg2 = X86_EDX;
4048                         }
4049
4050                         if (breg == X86_EAX) {
4051                                 x86_push_reg (code, X86_ESI);
4052                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
4053                                 breg = X86_ESI;
4054                         }
4055
4056                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
4057
4058                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
4059                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
4060                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
4061                         x86_patch (br [1], br [0]);
4062
4063                         if (breg != ins->inst_basereg)
4064                                 x86_pop_reg (code, X86_ESI);
4065
4066                         if (ins->dreg != X86_EAX) {
4067                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
4068                                 x86_pop_reg (code, X86_EAX);
4069                         }
4070
4071                         if (ins->sreg2 != sreg2)
4072                                 x86_pop_reg (code, X86_EDX);
4073
4074                         break;
4075                 }
4076                 default:
4077                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
4078                         g_assert_not_reached ();
4079                 }
4080
4081                 if ((code - cfg->native_code - offset) > max_len) {
4082                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4083                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4084                         g_assert_not_reached ();
4085                 }
4086                
4087                 cpos += max_len;
4088
4089                 last_ins = ins;
4090                 last_offset = offset;
4091                 
4092                 ins = ins->next;
4093         }
4094
4095         cfg->code_len = code - cfg->native_code;
4096 }
4097
4098 void
4099 mono_arch_register_lowlevel_calls (void)
4100 {
4101 }
4102
4103 void
4104 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4105 {
4106         MonoJumpInfo *patch_info;
4107         gboolean compile_aot = !run_cctors;
4108
4109         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4110                 unsigned char *ip = patch_info->ip.i + code;
4111                 const unsigned char *target;
4112
4113                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4114
4115                 if (compile_aot) {
4116                         switch (patch_info->type) {
4117                         case MONO_PATCH_INFO_BB:
4118                         case MONO_PATCH_INFO_LABEL:
4119                                 break;
4120                         default:
4121                                 /* No need to patch these */
4122                                 continue;
4123                         }
4124                 }
4125
4126                 switch (patch_info->type) {
4127                 case MONO_PATCH_INFO_IP:
4128                         *((gconstpointer *)(ip)) = target;
4129                         break;
4130                 case MONO_PATCH_INFO_CLASS_INIT: {
4131                         guint8 *code = ip;
4132                         /* Might already been changed to a nop */
4133                         x86_call_code (code, 0);
4134                         x86_patch (ip, target);
4135                         break;
4136                 }
4137                 case MONO_PATCH_INFO_ABS:
4138                 case MONO_PATCH_INFO_METHOD:
4139                 case MONO_PATCH_INFO_METHOD_JUMP:
4140                 case MONO_PATCH_INFO_INTERNAL_METHOD:
4141                 case MONO_PATCH_INFO_BB:
4142                 case MONO_PATCH_INFO_LABEL:
4143                         x86_patch (ip, target);
4144                         break;
4145                 case MONO_PATCH_INFO_NONE:
4146                         break;
4147                 default: {
4148                         guint32 offset = mono_arch_get_patch_offset (ip);
4149                         *((gconstpointer *)(ip + offset)) = target;
4150                         break;
4151                 }
4152                 }
4153         }
4154 }
4155
4156 guint8 *
4157 mono_arch_emit_prolog (MonoCompile *cfg)
4158 {
4159         MonoMethod *method = cfg->method;
4160         MonoBasicBlock *bb;
4161         MonoMethodSignature *sig;
4162         MonoInst *inst;
4163         int alloc_size, pos, max_offset, i;
4164         guint8 *code;
4165
4166         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
4167         code = cfg->native_code = g_malloc (cfg->code_size);
4168
4169         x86_push_reg (code, X86_EBP);
4170         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4171
4172         alloc_size = - cfg->stack_offset;
4173         pos = 0;
4174
4175         if (method->save_lmf) {
4176                 pos += sizeof (MonoLMF);
4177
4178                 /* save the current IP */
4179                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4180                 x86_push_imm_template (code);
4181
4182                 /* save all caller saved regs */
4183                 x86_push_reg (code, X86_EBP);
4184                 x86_push_reg (code, X86_ESI);
4185                 x86_push_reg (code, X86_EDI);
4186                 x86_push_reg (code, X86_EBX);
4187
4188                 /* save method info */
4189                 x86_push_imm (code, method);
4190
4191                 /* get the address of lmf for the current thread */
4192                 /* 
4193                  * This is performance critical so we try to use some tricks to make
4194                  * it fast.
4195                  */
4196                 if (lmf_tls_offset != -1) {
4197                         /* Load lmf quicky using the GS register */
4198                         x86_prefix (code, X86_GS_PREFIX);
4199                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
4200                 }
4201                 else {
4202                         if (cfg->compile_aot) {
4203                                 /* The GOT var does not exist yet */
4204                                 x86_call_imm (code, 0);
4205                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4206                                 x86_pop_reg (code, X86_EAX);
4207                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4208                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4209                                 x86_call_membase (code, X86_EAX, 0xf0f0f0f0);
4210                         }
4211                         else
4212                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4213                 }
4214
4215                 /* push lmf */
4216                 x86_push_reg (code, X86_EAX); 
4217                 /* push *lfm (previous_lmf) */
4218                 x86_push_membase (code, X86_EAX, 0);
4219                 /* *(lmf) = ESP */
4220                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4221         } else {
4222
4223                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4224                         x86_push_reg (code, X86_EBX);
4225                         pos += 4;
4226                 }
4227
4228                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4229                         x86_push_reg (code, X86_EDI);
4230                         pos += 4;
4231                 }
4232
4233                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4234                         x86_push_reg (code, X86_ESI);
4235                         pos += 4;
4236                 }
4237         }
4238
4239         alloc_size -= pos;
4240
4241         if (alloc_size) {
4242                 /* See mono_emit_stack_alloc */
4243 #ifdef PLATFORM_WIN32
4244                 guint32 remaining_size = alloc_size;
4245                 while (remaining_size >= 0x1000) {
4246                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4247                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4248                         remaining_size -= 0x1000;
4249                 }
4250                 if (remaining_size)
4251                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4252 #else
4253                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4254 #endif
4255         }
4256
4257         /* compute max_offset in order to use short forward jumps */
4258         max_offset = 0;
4259         if (cfg->opt & MONO_OPT_BRANCH) {
4260                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4261                         MonoInst *ins = bb->code;
4262                         bb->max_offset = max_offset;
4263
4264                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4265                                 max_offset += 6;
4266                         /* max alignment for loops */
4267                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4268                                 max_offset += LOOP_ALIGNMENT;
4269
4270                         while (ins) {
4271                                 if (ins->opcode == OP_LABEL)
4272                                         ins->inst_c1 = max_offset;
4273                                 
4274                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
4275                                 ins = ins->next;
4276                         }
4277                 }
4278         }
4279
4280         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4281                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4282
4283         /* load arguments allocated to register from the stack */
4284         sig = mono_method_signature (method);
4285         pos = 0;
4286
4287         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4288                 inst = cfg->varinfo [pos];
4289                 if (inst->opcode == OP_REGVAR) {
4290                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4291                         if (cfg->verbose_level > 2)
4292                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4293                 }
4294                 pos++;
4295         }
4296
4297         cfg->code_len = code - cfg->native_code;
4298
4299         return code;
4300 }
4301
4302 void
4303 mono_arch_emit_epilog (MonoCompile *cfg)
4304 {
4305         MonoMethod *method = cfg->method;
4306         MonoMethodSignature *sig = mono_method_signature (method);
4307         int quad, pos;
4308         guint32 stack_to_pop;
4309         guint8 *code;
4310         int max_epilog_size = 16;
4311         CallInfo *cinfo;
4312         
4313         if (cfg->method->save_lmf)
4314                 max_epilog_size += 128;
4315         
4316         if (mono_jit_trace_calls != NULL)
4317                 max_epilog_size += 50;
4318
4319         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4320                 cfg->code_size *= 2;
4321                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4322                 mono_jit_stats.code_reallocs++;
4323         }
4324
4325         code = cfg->native_code + cfg->code_len;
4326
4327         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4328                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4329
4330         /* the code restoring the registers must be kept in sync with CEE_JMP */
4331         pos = 0;
4332         
4333         if (method->save_lmf) {
4334                 gint32 prev_lmf_reg;
4335
4336                 /* Find a spare register */
4337                 switch (sig->ret->type) {
4338                 case MONO_TYPE_I8:
4339                 case MONO_TYPE_U8:
4340                         prev_lmf_reg = X86_EDI;
4341                         cfg->used_int_regs |= (1 << X86_EDI);
4342                         break;
4343                 default:
4344                         prev_lmf_reg = X86_EDX;
4345                         break;
4346                 }
4347
4348                 /* reg = previous_lmf */
4349                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, -32, 4);
4350
4351                 /* ecx = lmf */
4352                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, -28, 4);
4353
4354                 /* *(lmf) = previous_lmf */
4355                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4356
4357                 /* restore caller saved regs */
4358                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4359                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, -20, 4);
4360                 }
4361
4362                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4363                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, -16, 4);
4364                 }
4365                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4366                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, -12, 4);
4367                 }
4368
4369                 /* EBP is restored by LEAVE */
4370         } else {
4371                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4372                         pos -= 4;
4373                 }
4374                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4375                         pos -= 4;
4376                 }
4377                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4378                         pos -= 4;
4379                 }
4380
4381                 if (pos)
4382                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4383
4384                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4385                         x86_pop_reg (code, X86_ESI);
4386                 }
4387                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4388                         x86_pop_reg (code, X86_EDI);
4389                 }
4390                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4391                         x86_pop_reg (code, X86_EBX);
4392                 }
4393         }
4394
4395         /* Load returned vtypes into registers if needed */
4396         cinfo = get_call_info (sig, FALSE);
4397         if (cinfo->ret.storage == ArgValuetypeInReg) {
4398                 for (quad = 0; quad < 2; quad ++) {
4399                         switch (cinfo->ret.pair_storage [quad]) {
4400                         case ArgInIReg:
4401                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
4402                                 break;
4403                         case ArgOnFloatFpStack:
4404                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
4405                                 break;
4406                         case ArgOnDoubleFpStack:
4407                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
4408                                 break;
4409                         case ArgNone:
4410                                 break;
4411                         default:
4412                                 g_assert_not_reached ();
4413                         }
4414                 }
4415         }
4416
4417         x86_leave (code);
4418
4419         if (CALLCONV_IS_STDCALL (sig)) {
4420                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4421
4422                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4423         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
4424                 stack_to_pop = 4;
4425         else
4426                 stack_to_pop = 0;
4427
4428         if (stack_to_pop)
4429                 x86_ret_imm (code, stack_to_pop);
4430         else
4431                 x86_ret (code);
4432
4433         g_free (cinfo);
4434
4435         cfg->code_len = code - cfg->native_code;
4436
4437         g_assert (cfg->code_len < cfg->code_size);
4438 }
4439
4440 void
4441 mono_arch_emit_exceptions (MonoCompile *cfg)
4442 {
4443         MonoJumpInfo *patch_info;
4444         int nthrows, i;
4445         guint8 *code;
4446         MonoClass *exc_classes [16];
4447         guint8 *exc_throw_start [16], *exc_throw_end [16];
4448         guint32 code_size;
4449         int exc_count = 0;
4450
4451         /* Compute needed space */
4452         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4453                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4454                         exc_count++;
4455         }
4456
4457         /* 
4458          * make sure we have enough space for exceptions
4459          * 16 is the size of two push_imm instructions and a call
4460          */
4461         if (cfg->compile_aot)
4462                 code_size = exc_count * 32;
4463         else
4464                 code_size = exc_count * 16;
4465
4466         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4467                 cfg->code_size *= 2;
4468                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4469                 mono_jit_stats.code_reallocs++;
4470         }
4471
4472         code = cfg->native_code + cfg->code_len;
4473
4474         nthrows = 0;
4475         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4476                 switch (patch_info->type) {
4477                 case MONO_PATCH_INFO_EXC: {
4478                         MonoClass *exc_class;
4479                         guint8 *buf, *buf2;
4480                         guint32 throw_ip;
4481
4482                         x86_patch (patch_info->ip.i + cfg->native_code, code);
4483
4484                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4485                         g_assert (exc_class);
4486                         throw_ip = patch_info->ip.i;
4487
4488                         /* Find a throw sequence for the same exception class */
4489                         for (i = 0; i < nthrows; ++i)
4490                                 if (exc_classes [i] == exc_class)
4491                                         break;
4492                         if (i < nthrows) {
4493                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4494                                 x86_jump_code (code, exc_throw_start [i]);
4495                                 patch_info->type = MONO_PATCH_INFO_NONE;
4496                         }
4497                         else {
4498                                 guint32 got_reg = X86_EAX;
4499                                 guint32 size;
4500
4501                                 /* Compute size of code following the push <OFFSET> */
4502                                 if (cfg->compile_aot) {
4503                                         size = 5 + 6;
4504                                         if (!cfg->got_var)
4505                                                 size += 32;
4506                                         else if (cfg->got_var->opcode == OP_REGOFFSET)
4507                                                 size += 6;
4508                                 }
4509                                 else
4510                                         size = 5 + 5;
4511
4512                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4513                                         /* Use the shorter form */
4514                                         buf = buf2 = code;
4515                                         x86_push_imm (code, 0);
4516                                 }
4517                                 else {
4518                                         buf = code;
4519                                         x86_push_imm (code, 0xf0f0f0f0);
4520                                         buf2 = code;
4521                                 }
4522
4523                                 if (nthrows < 16) {
4524                                         exc_classes [nthrows] = exc_class;
4525                                         exc_throw_start [nthrows] = code;
4526                                 }
4527
4528                                 if (cfg->compile_aot) {          
4529                                         /*
4530                                          * Since the patches are generated by the back end, there is                                     * no way to generate a got_var at this point.   
4531                                          */
4532                                         if (!cfg->got_var) {
4533                                                 x86_call_imm (code, 0);
4534                                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4535                                                 x86_pop_reg (code, X86_EAX);
4536                                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4537                                         }
4538                                         else {
4539                                                 if (cfg->got_var->opcode == OP_REGOFFSET)
4540                                                         x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
4541                                                 else
4542                                                         got_reg = cfg->got_var->dreg;
4543                                         }
4544                                 }
4545
4546                                 x86_push_imm (code, exc_class->type_token);
4547                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4548                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4549                                 patch_info->ip.i = code - cfg->native_code;
4550                                 if (cfg->compile_aot)
4551                                         x86_call_membase (code, got_reg, 0xf0f0f0f0);
4552                                 else
4553                                         x86_call_code (code, 0);
4554                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4555                                 while (buf < buf2)
4556                                         x86_nop (buf);
4557
4558                                 if (nthrows < 16) {
4559                                         exc_throw_end [nthrows] = code;
4560                                         nthrows ++;
4561                                 }
4562                         }
4563                         break;
4564                 }
4565                 default:
4566                         /* do nothing */
4567                         break;
4568                 }
4569         }
4570
4571         cfg->code_len = code - cfg->native_code;
4572
4573         g_assert (cfg->code_len < cfg->code_size);
4574 }
4575
4576 void
4577 mono_arch_flush_icache (guint8 *code, gint size)
4578 {
4579         /* not needed */
4580 }
4581
4582 void
4583 mono_arch_flush_register_windows (void)
4584 {
4585 }
4586
4587 /*
4588  * Support for fast access to the thread-local lmf structure using the GS
4589  * segment register on NPTL + kernel 2.6.x.
4590  */
4591
4592 static gboolean tls_offset_inited = FALSE;
4593
4594 void
4595 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4596 {
4597 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4598         pthread_t self = pthread_self();
4599         pthread_attr_t attr;
4600         void *staddr = NULL;
4601         size_t stsize = 0;
4602         struct sigaltstack sa;
4603 #endif
4604
4605         if (!tls_offset_inited) {
4606                 tls_offset_inited = TRUE;
4607                 if (!getenv ("MONO_NO_TLS")) {
4608                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4609                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4610                         thread_tls_offset = mono_thread_get_tls_offset ();
4611                 }
4612         }               
4613
4614 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4615
4616         /* Determine stack boundaries */
4617         if (!mono_running_on_valgrind ()) {
4618 #ifdef HAVE_PTHREAD_GETATTR_NP
4619                 pthread_getattr_np( self, &attr );
4620 #else
4621 #ifdef HAVE_PTHREAD_ATTR_GET_NP
4622                 pthread_attr_get_np( self, &attr );
4623 #elif defined(sun)
4624                 pthread_attr_init( &attr );
4625                 pthread_attr_getstacksize( &attr, &stsize );
4626 #else
4627 #error "Not implemented"
4628 #endif
4629 #endif
4630 #ifndef sun
4631                 pthread_attr_getstack( &attr, &staddr, &stsize );
4632 #endif
4633         }
4634
4635         /* 
4636          * staddr seems to be wrong for the main thread, so we keep the value in
4637          * tls->end_of_stack
4638          */
4639         tls->stack_size = stsize;
4640
4641         /* Setup an alternate signal stack */
4642         tls->signal_stack = g_malloc (SIGNAL_STACK_SIZE);
4643         tls->signal_stack_size = SIGNAL_STACK_SIZE;
4644
4645         sa.ss_sp = tls->signal_stack;
4646         sa.ss_size = SIGNAL_STACK_SIZE;
4647         sa.ss_flags = SS_ONSTACK;
4648         sigaltstack (&sa, NULL);
4649 #endif
4650 }
4651
4652 void
4653 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4654 {
4655 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4656         struct sigaltstack sa;
4657
4658         sa.ss_sp = tls->signal_stack;
4659         sa.ss_size = SIGNAL_STACK_SIZE;
4660         sa.ss_flags = SS_DISABLE;
4661         sigaltstack  (&sa, NULL);
4662
4663         if (tls->signal_stack)
4664                 g_free (tls->signal_stack);
4665 #endif
4666 }
4667
4668 void
4669 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4670 {
4671
4672         /* add the this argument */
4673         if (this_reg != -1) {
4674                 MonoInst *this;
4675                 MONO_INST_NEW (cfg, this, OP_OUTARG);
4676                 this->type = this_type;
4677                 this->sreg1 = this_reg;
4678                 mono_bblock_add_inst (cfg->cbb, this);
4679         }
4680
4681         if (vt_reg != -1) {
4682                 CallInfo * cinfo = get_call_info (inst->signature, FALSE);
4683                 MonoInst *vtarg;
4684
4685                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4686                         /*
4687                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4688                          * the stack. Save the address here, so the call instruction can
4689                          * access it.
4690                          */
4691                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4692                         vtarg->inst_destbasereg = X86_ESP;
4693                         vtarg->inst_offset = inst->stack_usage;
4694                         vtarg->sreg1 = vt_reg;
4695                         mono_bblock_add_inst (cfg->cbb, vtarg);
4696                 }
4697                 else {
4698                         MonoInst *vtarg;
4699                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4700                         vtarg->type = STACK_MP;
4701                         vtarg->sreg1 = vt_reg;
4702                         mono_bblock_add_inst (cfg->cbb, vtarg);
4703                 }
4704
4705                 g_free (cinfo);
4706         }
4707 }
4708
4709
4710 MonoInst*
4711 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4712 {
4713         MonoInst *ins = NULL;
4714
4715         if (cmethod->klass == mono_defaults.math_class) {
4716                 if (strcmp (cmethod->name, "Sin") == 0) {
4717                         MONO_INST_NEW (cfg, ins, OP_SIN);
4718                         ins->inst_i0 = args [0];
4719                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4720                         MONO_INST_NEW (cfg, ins, OP_COS);
4721                         ins->inst_i0 = args [0];
4722                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4723                         MONO_INST_NEW (cfg, ins, OP_TAN);
4724                         ins->inst_i0 = args [0];
4725                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4726                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4727                         ins->inst_i0 = args [0];
4728                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4729                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4730                         ins->inst_i0 = args [0];
4731                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4732                         MONO_INST_NEW (cfg, ins, OP_ABS);
4733                         ins->inst_i0 = args [0];
4734                 }
4735 #if 0
4736                 /* OP_FREM is not IEEE compatible */
4737                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4738                         MONO_INST_NEW (cfg, ins, OP_FREM);
4739                         ins->inst_i0 = args [0];
4740                         ins->inst_i1 = args [1];
4741                 }
4742 #endif
4743         } else if(cmethod->klass->image == mono_defaults.corlib &&
4744                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4745                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4746
4747                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4748                         MonoInst *ins_iconst;
4749
4750                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4751                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4752                         ins_iconst->inst_c0 = 1;
4753
4754                         ins->inst_i0 = args [0];
4755                         ins->inst_i1 = ins_iconst;
4756                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4757                         MonoInst *ins_iconst;
4758
4759                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4760                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4761                         ins_iconst->inst_c0 = -1;
4762
4763                         ins->inst_i0 = args [0];
4764                         ins->inst_i1 = ins_iconst;
4765                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4766                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4767
4768                         ins->inst_i0 = args [0];
4769                         ins->inst_i1 = args [1];
4770                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4771                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_I4);
4772
4773                         ins->inst_i0 = args [0];
4774                         ins->inst_i1 = args [1];
4775                 }
4776         }
4777
4778         return ins;
4779 }
4780
4781
4782 gboolean
4783 mono_arch_print_tree (MonoInst *tree, int arity)
4784 {
4785         return 0;
4786 }
4787
4788 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4789 {
4790         MonoInst* ins;
4791         
4792         if (appdomain_tls_offset == -1)
4793                 return NULL;
4794         
4795         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4796         ins->inst_offset = appdomain_tls_offset;
4797         return ins;
4798 }
4799
4800 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4801 {
4802         MonoInst* ins;
4803         
4804         if (thread_tls_offset == -1)
4805                 return NULL;
4806         
4807         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4808         ins->inst_offset = thread_tls_offset;
4809         return ins;
4810 }
4811
4812 guint32
4813 mono_arch_get_patch_offset (guint8 *code)
4814 {
4815         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4816                 return 2;
4817         else if ((code [0] == 0xba))
4818                 return 1;
4819         else if ((code [0] == 0x68))
4820                 /* push IMM */
4821                 return 1;
4822         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4823                 /* push <OFFSET>(<REG>) */
4824                 return 2;
4825         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4826                 /* call *<OFFSET>(<REG>) */
4827                 return 2;
4828         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4829                 /* fldl <ADDR> */
4830                 return 2;
4831         else if ((code [0] == 0x58) && (code [1] == 0x05))
4832                 /* pop %eax; add <OFFSET>, %eax */
4833                 return 2;
4834         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4835                 /* pop <REG>; add <OFFSET>, <REG> */
4836                 return 3;
4837         else {
4838                 g_assert_not_reached ();
4839                 return -1;
4840         }
4841 }