2005-02-05 Zoltan Varga <vargaz@freemail.hu>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14
15 #include <mono/metadata/appdomain.h>
16 #include <mono/metadata/debug-helpers.h>
17 #include <mono/metadata/threads.h>
18 #include <mono/metadata/profiler-private.h>
19 #include <mono/utils/mono-math.h>
20
21 #include "trace.h"
22 #include "mini-x86.h"
23 #include "inssel.h"
24 #include "cpu-pentium.h"
25
26 static gint lmf_tls_offset = -1;
27 static gint appdomain_tls_offset = -1;
28 static gint thread_tls_offset = -1;
29
30 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
31
32 #ifdef PLATFORM_WIN32
33 /* Under windows, the default pinvoke calling convention is stdcall */
34 #define CALLCONV_IS_STDCALL(call_conv) (((call_conv) == MONO_CALL_STDCALL) || ((call_conv) == MONO_CALL_DEFAULT))
35 #else
36 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
37 #endif
38
39 #define SIGNAL_STACK_SIZE (64 * 1024)
40
41 #define NOT_IMPLEMENTED g_assert_not_reached ()
42
43 const char*
44 mono_arch_regname (int reg) {
45         switch (reg) {
46         case X86_EAX: return "%eax";
47         case X86_EBX: return "%ebx";
48         case X86_ECX: return "%ecx";
49         case X86_EDX: return "%edx";
50         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
51         case X86_EDI: return "%edi";
52         case X86_ESI: return "%esi";
53         }
54         return "unknown";
55 }
56
57 typedef enum {
58         ArgInIReg,
59         ArgInFloatSSEReg,
60         ArgInDoubleSSEReg,
61         ArgOnStack,
62         ArgValuetypeInReg,
63         ArgOnFpStack,
64         ArgNone /* only in pair_storage */
65 } ArgStorage;
66
67 typedef struct {
68         gint16 offset;
69         gint8  reg;
70         ArgStorage storage;
71
72         /* Only if storage == ArgValuetypeInReg */
73         ArgStorage pair_storage [2];
74         gint8 pair_regs [2];
75 } ArgInfo;
76
77 typedef struct {
78         int nargs;
79         guint32 stack_usage;
80         guint32 reg_usage;
81         guint32 freg_usage;
82         gboolean need_stack_align;
83         ArgInfo ret;
84         ArgInfo sig_cookie;
85         ArgInfo args [1];
86 } CallInfo;
87
88 #define PARAM_REGS 0
89
90 #define FLOAT_PARAM_REGS 0
91
92 static X86_Reg_No param_regs [] = { };
93
94 #ifdef PLATFORM_WIN32
95 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
96 #endif
97
98 static void inline
99 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
100 {
101     ainfo->offset = *stack_size;
102
103     if (*gr >= PARAM_REGS) {
104                 ainfo->storage = ArgOnStack;
105                 (*stack_size) += sizeof (gpointer);
106     }
107     else {
108                 ainfo->storage = ArgInIReg;
109                 ainfo->reg = param_regs [*gr];
110                 (*gr) ++;
111     }
112 }
113
114 static void inline
115 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
116 {
117         ainfo->offset = *stack_size;
118
119         g_assert (PARAM_REGS == 0);
120         
121         ainfo->storage = ArgOnStack;
122         (*stack_size) += sizeof (gpointer) * 2;
123 }
124
125 static void inline
126 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
127 {
128     ainfo->offset = *stack_size;
129
130     if (*gr >= FLOAT_PARAM_REGS) {
131                 ainfo->storage = ArgOnStack;
132                 (*stack_size) += sizeof (gpointer);
133     }
134     else {
135                 /* A double register */
136                 if (is_double)
137                         ainfo->storage = ArgInDoubleSSEReg;
138                 else
139                         ainfo->storage = ArgInFloatSSEReg;
140                 ainfo->reg = *gr;
141                 (*gr) += 1;
142     }
143 }
144
145
146 static void
147 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
148                gboolean is_return,
149                guint32 *gr, guint32 *fr, guint32 *stack_size)
150 {
151         guint32 size;
152         MonoClass *klass;
153
154         klass = mono_class_from_mono_type (type);
155         if (sig->pinvoke) 
156                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
157         else 
158                 size = mono_type_stack_size (&klass->byval_arg, NULL);
159
160 #ifdef PLATFORM_WIN32
161         if (sig->pinvoke && is_return) {
162                 MonoMarshalType *info;
163
164                 /*
165                  * the exact rules are not very well documented, the code below seems to work with the 
166                  * code generated by gcc 3.3.3 -mno-cygwin.
167                  */
168                 info = mono_marshal_load_type_info (klass);
169                 g_assert (info);
170
171                 /* Special case structs with only a float member */
172                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
173                         ainfo->storage = ArgOnFpStack;
174                         return;
175                 }
176                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
177                         ainfo->storage = ArgOnFpStack;
178                         return;
179                 }               
180                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
181                         ainfo->storage = ArgValuetypeInReg;
182                         ainfo->pair_storage [0] = ArgInIReg;
183                         ainfo->pair_regs [0] = return_regs [0];
184                         ainfo->pair_storage [1] = ArgInIReg;
185                         ainfo->pair_regs [1] = return_regs [1];
186                         return;
187                 }
188         }
189 #endif
190
191         ainfo->offset = *stack_size;
192         ainfo->storage = ArgOnStack;
193         *stack_size += ALIGN_TO (size, sizeof (gpointer));
194 }
195
196 /*
197  * get_call_info:
198  *
199  *  Obtain information about a call according to the calling convention.
200  * For x86 ELF, see the "System V Application Binary Interface Intel386 
201  * Architecture Processor Supplment, Fourth Edition" document for more
202  * information.
203  * For x86 win32, see ???.
204  */
205 static CallInfo*
206 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
207 {
208         guint32 i, gr, fr;
209         MonoType *ret_type;
210         int n = sig->hasthis + sig->param_count;
211         guint32 stack_size = 0;
212         CallInfo *cinfo;
213
214         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
215
216         gr = 0;
217         fr = 0;
218
219         /* return value */
220         {
221                 ret_type = mono_type_get_underlying_type (sig->ret);
222                 switch (ret_type->type) {
223                 case MONO_TYPE_BOOLEAN:
224                 case MONO_TYPE_I1:
225                 case MONO_TYPE_U1:
226                 case MONO_TYPE_I2:
227                 case MONO_TYPE_U2:
228                 case MONO_TYPE_CHAR:
229                 case MONO_TYPE_I4:
230                 case MONO_TYPE_U4:
231                 case MONO_TYPE_I:
232                 case MONO_TYPE_U:
233                 case MONO_TYPE_PTR:
234                 case MONO_TYPE_CLASS:
235                 case MONO_TYPE_OBJECT:
236                 case MONO_TYPE_SZARRAY:
237                 case MONO_TYPE_ARRAY:
238                 case MONO_TYPE_STRING:
239                         cinfo->ret.storage = ArgInIReg;
240                         cinfo->ret.reg = X86_EAX;
241                         break;
242                 case MONO_TYPE_U8:
243                 case MONO_TYPE_I8:
244                         cinfo->ret.storage = ArgInIReg;
245                         cinfo->ret.reg = X86_EAX;
246                         break;
247                 case MONO_TYPE_R4:
248                         cinfo->ret.storage = ArgOnFpStack;
249                         break;
250                 case MONO_TYPE_R8:
251                         cinfo->ret.storage = ArgOnFpStack;
252                         break;
253                 case MONO_TYPE_VALUETYPE: {
254                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
255
256                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
257                         if (cinfo->ret.storage == ArgOnStack)
258                                 /* The caller passes the address where the value is stored */
259                                 add_general (&gr, &stack_size, &cinfo->ret);
260                         break;
261                 }
262                 case MONO_TYPE_TYPEDBYREF:
263                         /* Same as a valuetype with size 24 */
264                         add_general (&gr, &stack_size, &cinfo->ret);
265                         ;
266                         break;
267                 case MONO_TYPE_VOID:
268                         break;
269                 default:
270                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
271                 }
272         }
273
274         /* this */
275         if (sig->hasthis)
276                 add_general (&gr, &stack_size, cinfo->args + 0);
277
278         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
279                 gr = PARAM_REGS;
280                 fr = FLOAT_PARAM_REGS;
281                 
282                 /* Emit the signature cookie just before the implicit arguments */
283                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
284         }
285
286         for (i = 0; i < sig->param_count; ++i) {
287                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
288                 MonoType *ptype;
289
290                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
291                         /* We allways pass the sig cookie on the stack for simplicity */
292                         /* 
293                          * Prevent implicit arguments + the sig cookie from being passed 
294                          * in registers.
295                          */
296                         gr = PARAM_REGS;
297                         fr = FLOAT_PARAM_REGS;
298
299                         /* Emit the signature cookie just before the implicit arguments */
300                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
301                 }
302
303                 if (sig->params [i]->byref) {
304                         add_general (&gr, &stack_size, ainfo);
305                         continue;
306                 }
307                 ptype = mono_type_get_underlying_type (sig->params [i]);
308                 switch (ptype->type) {
309                 case MONO_TYPE_BOOLEAN:
310                 case MONO_TYPE_I1:
311                 case MONO_TYPE_U1:
312                         add_general (&gr, &stack_size, ainfo);
313                         break;
314                 case MONO_TYPE_I2:
315                 case MONO_TYPE_U2:
316                 case MONO_TYPE_CHAR:
317                         add_general (&gr, &stack_size, ainfo);
318                         break;
319                 case MONO_TYPE_I4:
320                 case MONO_TYPE_U4:
321                         add_general (&gr, &stack_size, ainfo);
322                         break;
323                 case MONO_TYPE_I:
324                 case MONO_TYPE_U:
325                 case MONO_TYPE_PTR:
326                 case MONO_TYPE_CLASS:
327                 case MONO_TYPE_OBJECT:
328                 case MONO_TYPE_STRING:
329                 case MONO_TYPE_SZARRAY:
330                 case MONO_TYPE_ARRAY:
331                         add_general (&gr, &stack_size, ainfo);
332                         break;
333                 case MONO_TYPE_VALUETYPE:
334                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
335                         break;
336                 case MONO_TYPE_TYPEDBYREF:
337                         stack_size += sizeof (MonoTypedRef);
338                         ainfo->storage = ArgOnStack;
339                         break;
340                 case MONO_TYPE_U8:
341                 case MONO_TYPE_I8:
342                         add_general_pair (&gr, &stack_size, ainfo);
343                         break;
344                 case MONO_TYPE_R4:
345                         add_float (&fr, &stack_size, ainfo, FALSE);
346                         break;
347                 case MONO_TYPE_R8:
348                         add_float (&fr, &stack_size, ainfo, TRUE);
349                         break;
350                 default:
351                         g_assert_not_reached ();
352                 }
353         }
354
355         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
356                 gr = PARAM_REGS;
357                 fr = FLOAT_PARAM_REGS;
358                 
359                 /* Emit the signature cookie just before the implicit arguments */
360                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
361         }
362
363         cinfo->stack_usage = stack_size;
364         cinfo->reg_usage = gr;
365         cinfo->freg_usage = fr;
366         return cinfo;
367 }
368
369 /*
370  * mono_arch_get_argument_info:
371  * @csig:  a method signature
372  * @param_count: the number of parameters to consider
373  * @arg_info: an array to store the result infos
374  *
375  * Gathers information on parameters such as size, alignment and
376  * padding. arg_info should be large enought to hold param_count + 1 entries. 
377  *
378  * Returns the size of the activation frame.
379  */
380 int
381 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
382 {
383         int k, frame_size = 0;
384         int size, align, pad;
385         int offset = 8;
386
387         if (MONO_TYPE_ISSTRUCT (csig->ret)) { 
388                 frame_size += sizeof (gpointer);
389                 offset += 4;
390         }
391
392         arg_info [0].offset = offset;
393
394         if (csig->hasthis) {
395                 frame_size += sizeof (gpointer);
396                 offset += 4;
397         }
398
399         arg_info [0].size = frame_size;
400
401         for (k = 0; k < param_count; k++) {
402                 
403                 if (csig->pinvoke)
404                         size = mono_type_native_stack_size (csig->params [k], &align);
405                 else
406                         size = mono_type_stack_size (csig->params [k], &align);
407
408                 /* ignore alignment for now */
409                 align = 1;
410
411                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
412                 arg_info [k].pad = pad;
413                 frame_size += size;
414                 arg_info [k + 1].pad = 0;
415                 arg_info [k + 1].size = size;
416                 offset += pad;
417                 arg_info [k + 1].offset = offset;
418                 offset += size;
419         }
420
421         align = MONO_ARCH_FRAME_ALIGNMENT;
422         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
423         arg_info [k].pad = pad;
424
425         return frame_size;
426 }
427
428 static const guchar cpuid_impl [] = {
429         0x55,                           /* push   %ebp */
430         0x89, 0xe5,                     /* mov    %esp,%ebp */
431         0x53,                           /* push   %ebx */
432         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
433         0x0f, 0xa2,                     /* cpuid   */
434         0x50,                           /* push   %eax */
435         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
436         0x89, 0x18,                     /* mov    %ebx,(%eax) */
437         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
438         0x89, 0x08,                     /* mov    %ecx,(%eax) */
439         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
440         0x89, 0x10,                     /* mov    %edx,(%eax) */
441         0x58,                           /* pop    %eax */
442         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
443         0x89, 0x02,                     /* mov    %eax,(%edx) */
444         0x5b,                           /* pop    %ebx */
445         0xc9,                           /* leave   */
446         0xc3,                           /* ret     */
447 };
448
449 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
450
451 static int 
452 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
453 {
454         int have_cpuid = 0;
455         __asm__  __volatile__ (
456                 "pushfl\n"
457                 "popl %%eax\n"
458                 "movl %%eax, %%edx\n"
459                 "xorl $0x200000, %%eax\n"
460                 "pushl %%eax\n"
461                 "popfl\n"
462                 "pushfl\n"
463                 "popl %%eax\n"
464                 "xorl %%edx, %%eax\n"
465                 "andl $0x200000, %%eax\n"
466                 "movl %%eax, %0"
467                 : "=r" (have_cpuid)
468                 :
469                 : "%eax", "%edx"
470         );
471
472         if (have_cpuid) {
473                 /* Have to use the code manager to get around WinXP DEP */
474                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
475                 CpuidFunc func;
476                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
477                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
478
479                 func = (CpuidFunc)ptr;
480                 func (id, p_eax, p_ebx, p_ecx, p_edx);
481
482                 mono_code_manager_destroy (codeman);
483
484                 /*
485                  * We use this approach because of issues with gcc and pic code, see:
486                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
487                 __asm__ __volatile__ ("cpuid"
488                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
489                         : "a" (id));
490                 */
491                 return 1;
492         }
493         return 0;
494 }
495
496 /*
497  * Initialize the cpu to execute managed code.
498  */
499 void
500 mono_arch_cpu_init (void)
501 {
502         guint16 fpcw;
503
504         /* spec compliance requires running with double precision */
505         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
506         fpcw &= ~X86_FPCW_PRECC_MASK;
507         fpcw |= X86_FPCW_PREC_DOUBLE;
508         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
509         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
510
511         mono_x86_tramp_init ();
512 }
513
514 /*
515  * This function returns the optimizations supported on this cpu.
516  */
517 guint32
518 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
519 {
520         int eax, ebx, ecx, edx;
521         guint32 opts = 0;
522         
523         *exclude_mask = 0;
524         /* Feature Flags function, flags returned in EDX. */
525         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
526                 if (edx & (1 << 15)) {
527                         opts |= MONO_OPT_CMOV;
528                         if (edx & 1)
529                                 opts |= MONO_OPT_FCMOV;
530                         else
531                                 *exclude_mask |= MONO_OPT_FCMOV;
532                 } else
533                         *exclude_mask |= MONO_OPT_CMOV;
534         }
535         return opts;
536 }
537
538 /*
539  * Determine whenever the trap whose info is in SIGINFO is caused by
540  * integer overflow.
541  */
542 gboolean
543 mono_arch_is_int_overflow (void *sigctx, void *info)
544 {
545         struct sigcontext *ctx = (struct sigcontext*)sigctx;
546         guint8* ip;
547
548         ip = (guint8*)ctx->SC_EIP;
549
550         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
551                 gint32 reg;
552
553                 /* idiv REG */
554                 switch (x86_modrm_rm (ip [1])) {
555                 case X86_ECX:
556                         reg = ctx->SC_ECX;
557                         break;
558                 case X86_EBX:
559                         reg = ctx->SC_EBX;
560                         break;
561                 default:
562                         g_assert_not_reached ();
563                         reg = -1;
564                 }
565
566                 if (reg == -1)
567                         return TRUE;
568         }
569                         
570         return FALSE;
571 }
572
573 static gboolean
574 is_regsize_var (MonoType *t) {
575         if (t->byref)
576                 return TRUE;
577         switch (mono_type_get_underlying_type (t)->type) {
578         case MONO_TYPE_I4:
579         case MONO_TYPE_U4:
580         case MONO_TYPE_I:
581         case MONO_TYPE_U:
582         case MONO_TYPE_PTR:
583                 return TRUE;
584         case MONO_TYPE_OBJECT:
585         case MONO_TYPE_STRING:
586         case MONO_TYPE_CLASS:
587         case MONO_TYPE_SZARRAY:
588         case MONO_TYPE_ARRAY:
589                 return TRUE;
590         case MONO_TYPE_VALUETYPE:
591                 return FALSE;
592         }
593         return FALSE;
594 }
595
596 GList *
597 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
598 {
599         GList *vars = NULL;
600         int i;
601
602         for (i = 0; i < cfg->num_varinfo; i++) {
603                 MonoInst *ins = cfg->varinfo [i];
604                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
605
606                 /* unused vars */
607                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
608                         continue;
609
610                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
611                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
612                         continue;
613
614                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
615                  * 8bit quantities in caller saved registers on x86 */
616                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
617                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
618                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
619                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
620                         g_assert (i == vmv->idx);
621                         vars = g_list_prepend (vars, vmv);
622                 }
623         }
624
625         vars = mono_varlist_sort (cfg, vars, 0);
626
627         return vars;
628 }
629
630 GList *
631 mono_arch_get_global_int_regs (MonoCompile *cfg)
632 {
633         GList *regs = NULL;
634
635         /* we can use 3 registers for global allocation */
636         regs = g_list_prepend (regs, (gpointer)X86_EBX);
637         regs = g_list_prepend (regs, (gpointer)X86_ESI);
638         regs = g_list_prepend (regs, (gpointer)X86_EDI);
639
640         return regs;
641 }
642
643 /*
644  * mono_arch_regalloc_cost:
645  *
646  *  Return the cost, in number of memory references, of the action of 
647  * allocating the variable VMV into a register during global register
648  * allocation.
649  */
650 guint32
651 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
652 {
653         MonoInst *ins = cfg->varinfo [vmv->idx];
654
655         if (cfg->method->save_lmf)
656                 /* The register is already saved */
657                 return (ins->opcode == OP_ARG) ? 1 : 0;
658         else
659                 /* push+pop+possible load if it is an argument */
660                 return (ins->opcode == OP_ARG) ? 3 : 2;
661 }
662  
663 /*
664  * Set var information according to the calling convention. X86 version.
665  * The locals var stuff should most likely be split in another method.
666  */
667 void
668 mono_arch_allocate_vars (MonoCompile *m)
669 {
670         MonoMethodSignature *sig;
671         MonoMethodHeader *header;
672         MonoInst *inst;
673         guint32 locals_stack_size, locals_stack_align;
674         int i, offset, curinst, size, align;
675         gint32 *offsets;
676
677         header = mono_method_get_header (m->method);
678         sig = mono_method_signature (m->method);
679
680         offset = 8;
681         curinst = 0;
682         if (MONO_TYPE_ISSTRUCT (sig->ret)) {
683                 m->ret->opcode = OP_REGOFFSET;
684                 m->ret->inst_basereg = X86_EBP;
685                 m->ret->inst_offset = offset;
686                 offset += sizeof (gpointer);
687         } else {
688                 /* FIXME: handle long and FP values */
689                 switch (sig->ret->type) {
690                 case MONO_TYPE_VOID:
691                         break;
692                 default:
693                         m->ret->opcode = OP_REGVAR;
694                         m->ret->inst_c0 = X86_EAX;
695                         break;
696                 }
697         }
698         if (sig->hasthis) {
699                 inst = m->varinfo [curinst];
700                 if (inst->opcode != OP_REGVAR) {
701                         inst->opcode = OP_REGOFFSET;
702                         inst->inst_basereg = X86_EBP;
703                 }
704                 inst->inst_offset = offset;
705                 offset += sizeof (gpointer);
706                 curinst++;
707         }
708
709         if (sig->call_convention == MONO_CALL_VARARG) {
710                 m->sig_cookie = offset;
711                 offset += sizeof (gpointer);
712         }
713
714         for (i = 0; i < sig->param_count; ++i) {
715                 inst = m->varinfo [curinst];
716                 if (inst->opcode != OP_REGVAR) {
717                         inst->opcode = OP_REGOFFSET;
718                         inst->inst_basereg = X86_EBP;
719                 }
720                 inst->inst_offset = offset;
721                 size = mono_type_size (sig->params [i], &align);
722                 size += 4 - 1;
723                 size &= ~(4 - 1);
724                 offset += size;
725                 curinst++;
726         }
727
728         offset = 0;
729
730         /* reserve space to save LMF and caller saved registers */
731
732         if (m->method->save_lmf) {
733                 offset += sizeof (MonoLMF);
734         } else {
735                 if (m->used_int_regs & (1 << X86_EBX)) {
736                         offset += 4;
737                 }
738
739                 if (m->used_int_regs & (1 << X86_EDI)) {
740                         offset += 4;
741                 }
742
743                 if (m->used_int_regs & (1 << X86_ESI)) {
744                         offset += 4;
745                 }
746         }
747
748         /* Allocate locals */
749         offsets = mono_allocate_stack_slots (m, &locals_stack_size, &locals_stack_align);
750         if (locals_stack_align) {
751                 offset += (locals_stack_align - 1);
752                 offset &= ~(locals_stack_align - 1);
753         }
754         for (i = m->locals_start; i < m->num_varinfo; i++) {
755                 if (offsets [i] != -1) {
756                         MonoInst *inst = m->varinfo [i];
757                         inst->opcode = OP_REGOFFSET;
758                         inst->inst_basereg = X86_EBP;
759                         inst->inst_offset = - (offset + offsets [i]);
760                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
761                 }
762         }
763         g_free (offsets);
764         offset += locals_stack_size;
765
766         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
767         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
768
769         /* change sign? */
770         m->stack_offset = -offset;
771 }
772
773 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
774  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
775  */
776
777 /* 
778  * take the arguments and generate the arch-specific
779  * instructions to properly call the function in call.
780  * This includes pushing, moving arguments to the right register
781  * etc.
782  * Issue: who does the spilling if needed, and when?
783  */
784 MonoCallInst*
785 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
786         MonoInst *arg, *in;
787         MonoMethodSignature *sig;
788         int i, n, stack_size, type;
789         MonoType *ptype;
790         CallInfo *cinfo;
791
792         stack_size = 0;
793         /* add the vararg cookie before the non-implicit args */
794         if (call->signature->call_convention == MONO_CALL_VARARG) {
795                 MonoInst *sig_arg;
796                 /* FIXME: Add support for signature tokens to AOT */
797                 cfg->disable_aot = TRUE;
798                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
799                 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
800                 sig_arg->inst_p0 = call->signature;
801                 arg->inst_left = sig_arg;
802                 arg->type = STACK_PTR;
803                 /* prepend, so they get reversed */
804                 arg->next = call->out_args;
805                 call->out_args = arg;
806                 stack_size += sizeof (gpointer);
807         }
808         sig = call->signature;
809         n = sig->param_count + sig->hasthis;
810
811         cinfo = get_call_info (sig, FALSE);
812
813         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
814                 if (cinfo->ret.storage == ArgOnStack)
815                         stack_size += sizeof (gpointer);
816         }
817
818         for (i = 0; i < n; ++i) {
819                 if (is_virtual && i == 0) {
820                         /* the argument will be attached to the call instrucion */
821                         in = call->args [i];
822                         stack_size += 4;
823                 } else {
824                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
825                         in = call->args [i];
826                         arg->cil_code = in->cil_code;
827                         arg->inst_left = in;
828                         arg->type = in->type;
829                         /* prepend, so they get reversed */
830                         arg->next = call->out_args;
831                         call->out_args = arg;
832                         if (i >= sig->hasthis) {
833                                 MonoType *t = sig->params [i - sig->hasthis];
834                                 ptype = mono_type_get_underlying_type (t);
835                                 if (t->byref)
836                                         type = MONO_TYPE_U;
837                                 else
838                                         type = ptype->type;
839                                 /* FIXME: validate arguments... */
840                                 switch (type) {
841                                 case MONO_TYPE_I:
842                                 case MONO_TYPE_U:
843                                 case MONO_TYPE_BOOLEAN:
844                                 case MONO_TYPE_CHAR:
845                                 case MONO_TYPE_I1:
846                                 case MONO_TYPE_U1:
847                                 case MONO_TYPE_I2:
848                                 case MONO_TYPE_U2:
849                                 case MONO_TYPE_I4:
850                                 case MONO_TYPE_U4:
851                                 case MONO_TYPE_STRING:
852                                 case MONO_TYPE_CLASS:
853                                 case MONO_TYPE_OBJECT:
854                                 case MONO_TYPE_PTR:
855                                 case MONO_TYPE_FNPTR:
856                                 case MONO_TYPE_ARRAY:
857                                 case MONO_TYPE_SZARRAY:
858                                         stack_size += 4;
859                                         break;
860                                 case MONO_TYPE_I8:
861                                 case MONO_TYPE_U8:
862                                         stack_size += 8;
863                                         break;
864                                 case MONO_TYPE_R4:
865                                         stack_size += 4;
866                                         arg->opcode = OP_OUTARG_R4;
867                                         break;
868                                 case MONO_TYPE_R8:
869                                         stack_size += 8;
870                                         arg->opcode = OP_OUTARG_R8;
871                                         break;
872                                 case MONO_TYPE_VALUETYPE: {
873                                         int size;
874                                         if (sig->pinvoke) 
875                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
876                                         else 
877                                                 size = mono_type_stack_size (&in->klass->byval_arg, NULL);
878
879                                         stack_size += size;
880                                         arg->opcode = OP_OUTARG_VT;
881                                         arg->klass = in->klass;
882                                         arg->unused = sig->pinvoke;
883                                         arg->inst_imm = size; 
884                                         break;
885                                 }
886                                 case MONO_TYPE_TYPEDBYREF:
887                                         stack_size += sizeof (MonoTypedRef);
888                                         arg->opcode = OP_OUTARG_VT;
889                                         arg->klass = in->klass;
890                                         arg->unused = sig->pinvoke;
891                                         arg->inst_imm = sizeof (MonoTypedRef); 
892                                         break;
893                                 default:
894                                         g_error ("unknown type 0x%02x in mono_arch_call_opcode\n", type);
895                                 }
896                         } else {
897                                 /* the this argument */
898                                 stack_size += 4;
899                         }
900                 }
901         }
902
903         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
904                 if (cinfo->ret.storage == ArgValuetypeInReg) {
905                         MonoInst *zero_inst;
906                         /*
907                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
908                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
909                          * before calling the function. So we add a dummy instruction to represent pushing the 
910                          * struct return address to the stack. The return address will be saved to this stack slot 
911                          * by the code emitted in this_vret_args.
912                          */
913                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
914                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
915                         zero_inst->inst_p0 = 0;
916                         arg->inst_left = zero_inst;
917                         arg->type = STACK_PTR;
918                         /* prepend, so they get reversed */
919                         arg->next = call->out_args;
920                         call->out_args = arg;
921                 }
922                 else
923                         /* if the function returns a struct, the called method already does a ret $0x4 */
924                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
925                                 stack_size -= 4;
926         }
927
928         call->stack_usage = stack_size;
929         g_free (cinfo);
930
931         /* 
932          * should set more info in call, such as the stack space
933          * used by the args that needs to be added back to esp
934          */
935
936         return call;
937 }
938
939 /*
940  * Allow tracing to work with this interface (with an optional argument)
941  */
942 void*
943 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
944 {
945         guchar *code = p;
946
947         /* if some args are passed in registers, we need to save them here */
948         x86_push_reg (code, X86_EBP);
949
950         if (cfg->compile_aot) {
951                 x86_push_imm (code, cfg->method);
952                 x86_mov_reg_imm (code, X86_EAX, func);
953                 x86_call_reg (code, X86_EAX);
954         } else {
955                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
956                 x86_push_imm (code, cfg->method);
957                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
958                 x86_call_code (code, 0);
959         }
960         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
961
962         return code;
963 }
964
965 enum {
966         SAVE_NONE,
967         SAVE_STRUCT,
968         SAVE_EAX,
969         SAVE_EAX_EDX,
970         SAVE_FP
971 };
972
973 void*
974 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
975 {
976         guchar *code = p;
977         int arg_size = 0, save_mode = SAVE_NONE;
978         MonoMethod *method = cfg->method;
979         
980         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
981         case MONO_TYPE_VOID:
982                 /* special case string .ctor icall */
983                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
984                         save_mode = SAVE_EAX;
985                 else
986                         save_mode = SAVE_NONE;
987                 break;
988         case MONO_TYPE_I8:
989         case MONO_TYPE_U8:
990                 save_mode = SAVE_EAX_EDX;
991                 break;
992         case MONO_TYPE_R4:
993         case MONO_TYPE_R8:
994                 save_mode = SAVE_FP;
995                 break;
996         case MONO_TYPE_VALUETYPE:
997                 save_mode = SAVE_STRUCT;
998                 break;
999         default:
1000                 save_mode = SAVE_EAX;
1001                 break;
1002         }
1003
1004         switch (save_mode) {
1005         case SAVE_EAX_EDX:
1006                 x86_push_reg (code, X86_EDX);
1007                 x86_push_reg (code, X86_EAX);
1008                 if (enable_arguments) {
1009                         x86_push_reg (code, X86_EDX);
1010                         x86_push_reg (code, X86_EAX);
1011                         arg_size = 8;
1012                 }
1013                 break;
1014         case SAVE_EAX:
1015                 x86_push_reg (code, X86_EAX);
1016                 if (enable_arguments) {
1017                         x86_push_reg (code, X86_EAX);
1018                         arg_size = 4;
1019                 }
1020                 break;
1021         case SAVE_FP:
1022                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1023                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1024                 if (enable_arguments) {
1025                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1026                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1027                         arg_size = 8;
1028                 }
1029                 break;
1030         case SAVE_STRUCT:
1031                 if (enable_arguments) {
1032                         x86_push_membase (code, X86_EBP, 8);
1033                         arg_size = 4;
1034                 }
1035                 break;
1036         case SAVE_NONE:
1037         default:
1038                 break;
1039         }
1040
1041         if (cfg->compile_aot) {
1042                 x86_push_imm (code, method);
1043                 x86_mov_reg_imm (code, X86_EAX, func);
1044                 x86_call_reg (code, X86_EAX);
1045         } else {
1046                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1047                 x86_push_imm (code, method);
1048                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1049                 x86_call_code (code, 0);
1050         }
1051         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1052
1053         switch (save_mode) {
1054         case SAVE_EAX_EDX:
1055                 x86_pop_reg (code, X86_EAX);
1056                 x86_pop_reg (code, X86_EDX);
1057                 break;
1058         case SAVE_EAX:
1059                 x86_pop_reg (code, X86_EAX);
1060                 break;
1061         case SAVE_FP:
1062                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1063                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1064                 break;
1065         case SAVE_NONE:
1066         default:
1067                 break;
1068         }
1069
1070         return code;
1071 }
1072
1073 #define EMIT_COND_BRANCH(ins,cond,sign) \
1074 if (ins->flags & MONO_INST_BRLABEL) { \
1075         if (ins->inst_i0->inst_c0) { \
1076                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1077         } else { \
1078                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1079                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1080                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1081                         x86_branch8 (code, cond, 0, sign); \
1082                 else \
1083                         x86_branch32 (code, cond, 0, sign); \
1084         } \
1085 } else { \
1086         if (ins->inst_true_bb->native_offset) { \
1087                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1088         } else { \
1089                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1090                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1091                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1092                         x86_branch8 (code, cond, 0, sign); \
1093                 else \
1094                         x86_branch32 (code, cond, 0, sign); \
1095         } \
1096 }
1097
1098 /* emit an exception if condition is fail */
1099 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1100         do {                                                        \
1101                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1102                                     MONO_PATCH_INFO_EXC, exc_name);  \
1103                 x86_branch32 (code, cond, 0, signed);               \
1104         } while (0); 
1105
1106 #define EMIT_FPCOMPARE(code) do { \
1107         x86_fcompp (code); \
1108         x86_fnstsw (code); \
1109 } while (0); 
1110
1111
1112 static guint8*
1113 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1114 {
1115         if (cfg->compile_aot) {
1116                 guint32 got_reg = X86_EAX;
1117
1118                 if (cfg->compile_aot) {          
1119                         /*
1120                          * Since the patches are generated by the back end, there is
1121                          * no way to generate a got_var at this point.
1122                          */
1123                         g_assert (cfg->got_var);
1124
1125                         if (cfg->got_var->opcode == OP_REGOFFSET)
1126                                 x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
1127                         else
1128                                 got_reg = cfg->got_var->dreg;
1129                 }
1130
1131                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1132                 x86_call_membase (code, got_reg, 0xf0f0f0f0);
1133         }
1134         else {
1135                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1136                 x86_call_code (code, 0);
1137         }
1138
1139         return code;
1140 }
1141
1142 /* FIXME: Add more instructions */
1143 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1144
1145 static void
1146 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1147 {
1148         MonoInst *ins, *last_ins = NULL;
1149         ins = bb->code;
1150
1151         while (ins) {
1152
1153                 switch (ins->opcode) {
1154                 case OP_ICONST:
1155                         /* reg = 0 -> XOR (reg, reg) */
1156                         /* XOR sets cflags on x86, so we cant do it always */
1157                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1158                                 ins->opcode = CEE_XOR;
1159                                 ins->sreg1 = ins->dreg;
1160                                 ins->sreg2 = ins->dreg;
1161                         }
1162                         break;
1163                 case OP_MUL_IMM: 
1164                         /* remove unnecessary multiplication with 1 */
1165                         if (ins->inst_imm == 1) {
1166                                 if (ins->dreg != ins->sreg1) {
1167                                         ins->opcode = OP_MOVE;
1168                                 } else {
1169                                         last_ins->next = ins->next;
1170                                         ins = ins->next;
1171                                         continue;
1172                                 }
1173                         }
1174                         break;
1175                 case OP_COMPARE_IMM:
1176                         /* OP_COMPARE_IMM (reg, 0) 
1177                          * --> 
1178                          * OP_X86_TEST_NULL (reg) 
1179                          */
1180                         if (!ins->inst_imm)
1181                                 ins->opcode = OP_X86_TEST_NULL;
1182                         break;
1183                 case OP_X86_COMPARE_MEMBASE_IMM:
1184                         /* 
1185                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1186                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1187                          * -->
1188                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1189                          * OP_COMPARE_IMM reg, imm
1190                          *
1191                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1192                          */
1193                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1194                             ins->inst_basereg == last_ins->inst_destbasereg &&
1195                             ins->inst_offset == last_ins->inst_offset) {
1196                                         ins->opcode = OP_COMPARE_IMM;
1197                                         ins->sreg1 = last_ins->sreg1;
1198
1199                                         /* check if we can remove cmp reg,0 with test null */
1200                                         if (!ins->inst_imm)
1201                                                 ins->opcode = OP_X86_TEST_NULL;
1202                                 }
1203
1204                         break;
1205                 case OP_LOAD_MEMBASE:
1206                 case OP_LOADI4_MEMBASE:
1207                         /* 
1208                          * Note: if reg1 = reg2 the load op is removed
1209                          *
1210                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1211                          * OP_LOAD_MEMBASE offset(basereg), reg2
1212                          * -->
1213                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1214                          * OP_MOVE reg1, reg2
1215                          */
1216                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1217                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1218                             ins->inst_basereg == last_ins->inst_destbasereg &&
1219                             ins->inst_offset == last_ins->inst_offset) {
1220                                 if (ins->dreg == last_ins->sreg1) {
1221                                         last_ins->next = ins->next;                             
1222                                         ins = ins->next;                                
1223                                         continue;
1224                                 } else {
1225                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1226                                         ins->opcode = OP_MOVE;
1227                                         ins->sreg1 = last_ins->sreg1;
1228                                 }
1229
1230                         /* 
1231                          * Note: reg1 must be different from the basereg in the second load
1232                          * Note: if reg1 = reg2 is equal then second load is removed
1233                          *
1234                          * OP_LOAD_MEMBASE offset(basereg), reg1
1235                          * OP_LOAD_MEMBASE offset(basereg), reg2
1236                          * -->
1237                          * OP_LOAD_MEMBASE offset(basereg), reg1
1238                          * OP_MOVE reg1, reg2
1239                          */
1240                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1241                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1242                               ins->inst_basereg != last_ins->dreg &&
1243                               ins->inst_basereg == last_ins->inst_basereg &&
1244                               ins->inst_offset == last_ins->inst_offset) {
1245
1246                                 if (ins->dreg == last_ins->dreg) {
1247                                         last_ins->next = ins->next;                             
1248                                         ins = ins->next;                                
1249                                         continue;
1250                                 } else {
1251                                         ins->opcode = OP_MOVE;
1252                                         ins->sreg1 = last_ins->dreg;
1253                                 }
1254
1255                                 //g_assert_not_reached ();
1256
1257 #if 0
1258                         /* 
1259                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1260                          * OP_LOAD_MEMBASE offset(basereg), reg
1261                          * -->
1262                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1263                          * OP_ICONST reg, imm
1264                          */
1265                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1266                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1267                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1268                                    ins->inst_offset == last_ins->inst_offset) {
1269                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1270                                 ins->opcode = OP_ICONST;
1271                                 ins->inst_c0 = last_ins->inst_imm;
1272                                 g_assert_not_reached (); // check this rule
1273 #endif
1274                         }
1275                         break;
1276                 case OP_LOADU1_MEMBASE:
1277                 case OP_LOADI1_MEMBASE:
1278                         /* 
1279                          * Note: if reg1 = reg2 the load op is removed
1280                          *
1281                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1282                          * OP_LOAD_MEMBASE offset(basereg), reg2
1283                          * -->
1284                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1285                          * OP_MOVE reg1, reg2
1286                          */
1287                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1288                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1289                                         ins->inst_offset == last_ins->inst_offset) {
1290                                 if (ins->dreg == last_ins->sreg1) {
1291                                         last_ins->next = ins->next;                             
1292                                         ins = ins->next;                                
1293                                         continue;
1294                                 } else {
1295                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1296                                         ins->opcode = OP_MOVE;
1297                                         ins->sreg1 = last_ins->sreg1;
1298                                 }
1299                         }
1300                         break;
1301                 case OP_LOADU2_MEMBASE:
1302                 case OP_LOADI2_MEMBASE:
1303                         /* 
1304                          * Note: if reg1 = reg2 the load op is removed
1305                          *
1306                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1307                          * OP_LOAD_MEMBASE offset(basereg), reg2
1308                          * -->
1309                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1310                          * OP_MOVE reg1, reg2
1311                          */
1312                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1313                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1314                                         ins->inst_offset == last_ins->inst_offset) {
1315                                 if (ins->dreg == last_ins->sreg1) {
1316                                         last_ins->next = ins->next;                             
1317                                         ins = ins->next;                                
1318                                         continue;
1319                                 } else {
1320                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1321                                         ins->opcode = OP_MOVE;
1322                                         ins->sreg1 = last_ins->sreg1;
1323                                 }
1324                         }
1325                         break;
1326                 case CEE_CONV_I4:
1327                 case CEE_CONV_U4:
1328                 case OP_MOVE:
1329                         /*
1330                          * Removes:
1331                          *
1332                          * OP_MOVE reg, reg 
1333                          */
1334                         if (ins->dreg == ins->sreg1) {
1335                                 if (last_ins)
1336                                         last_ins->next = ins->next;                             
1337                                 ins = ins->next;
1338                                 continue;
1339                         }
1340                         /* 
1341                          * Removes:
1342                          *
1343                          * OP_MOVE sreg, dreg 
1344                          * OP_MOVE dreg, sreg
1345                          */
1346                         if (last_ins && last_ins->opcode == OP_MOVE &&
1347                             ins->sreg1 == last_ins->dreg &&
1348                             ins->dreg == last_ins->sreg1) {
1349                                 last_ins->next = ins->next;                             
1350                                 ins = ins->next;                                
1351                                 continue;
1352                         }
1353                         break;
1354                         
1355                 case OP_X86_PUSH_MEMBASE:
1356                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1357                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1358                             ins->inst_basereg == last_ins->inst_destbasereg &&
1359                             ins->inst_offset == last_ins->inst_offset) {
1360                                     ins->opcode = OP_X86_PUSH;
1361                                     ins->sreg1 = last_ins->sreg1;
1362                         }
1363                         break;
1364                 }
1365                 last_ins = ins;
1366                 ins = ins->next;
1367         }
1368         bb->last_ins = last_ins;
1369 }
1370
1371 static const int 
1372 branch_cc_table [] = {
1373         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1374         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1375         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1376 };
1377
1378 #define DEBUG(a) if (cfg->verbose_level > 1) a
1379 //#define DEBUG(a)
1380
1381 /*
1382  * returns the offset used by spillvar. It allocates a new
1383  * spill variable if necessary. 
1384  */
1385 static int
1386 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1387 {
1388         MonoSpillInfo **si, *info;
1389         int i = 0;
1390
1391         si = &cfg->spill_info; 
1392         
1393         while (i <= spillvar) {
1394
1395                 if (!*si) {
1396                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1397                         info->next = NULL;
1398                         cfg->stack_offset -= sizeof (gpointer);
1399                         info->offset = cfg->stack_offset;
1400                 }
1401
1402                 if (i == spillvar)
1403                         return (*si)->offset;
1404
1405                 i++;
1406                 si = &(*si)->next;
1407         }
1408
1409         g_assert_not_reached ();
1410         return 0;
1411 }
1412
1413 /*
1414  * returns the offset used by spillvar. It allocates a new
1415  * spill float variable if necessary. 
1416  * (same as mono_spillvar_offset but for float)
1417  */
1418 static int
1419 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1420 {
1421         MonoSpillInfo **si, *info;
1422         int i = 0;
1423
1424         si = &cfg->spill_info_float; 
1425         
1426         while (i <= spillvar) {
1427
1428                 if (!*si) {
1429                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1430                         info->next = NULL;
1431                         cfg->stack_offset -= sizeof (double);
1432                         info->offset = cfg->stack_offset;
1433                 }
1434
1435                 if (i == spillvar)
1436                         return (*si)->offset;
1437
1438                 i++;
1439                 si = &(*si)->next;
1440         }
1441
1442         g_assert_not_reached ();
1443         return 0;
1444 }
1445
1446 /*
1447  * Creates a store for spilled floating point items
1448  */
1449 static MonoInst*
1450 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1451 {
1452         MonoInst *store;
1453         MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1454         store->sreg1 = reg;
1455         store->inst_destbasereg = X86_EBP;
1456         store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1457
1458         DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08x(%%sp)) (from %d)\n", spill, store->inst_offset, reg));
1459         return store;
1460 }
1461
1462 /*
1463  * Creates a load for spilled floating point items 
1464  */
1465 static MonoInst*
1466 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1467 {
1468         MonoInst *load;
1469         MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1470         load->dreg = reg;
1471         load->inst_basereg = X86_EBP;
1472         load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1473
1474         DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08x(%%sp)) (from %d)\n", spill, load->inst_offset, reg));
1475         return load;
1476 }
1477
1478 #define is_global_ireg(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && !X86_IS_CALLEE ((r)))
1479 #define reg_is_freeable(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && X86_IS_CALLEE ((r)))
1480
1481 typedef struct {
1482         int born_in;
1483         int killed_in;
1484         int last_use;
1485         int prev_use;
1486         int flags;              /* used to track fp spill/load */
1487 } RegTrack;
1488
1489 static const char*const * ins_spec = pentium_desc;
1490
1491 static void
1492 print_ins (int i, MonoInst *ins)
1493 {
1494         const char *spec = ins_spec [ins->opcode];
1495         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1496         if (spec [MONO_INST_DEST]) {
1497                 if (ins->dreg >= MONO_MAX_IREGS)
1498                         g_print (" R%d <-", ins->dreg);
1499                 else
1500                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1501         }
1502         if (spec [MONO_INST_SRC1]) {
1503                 if (ins->sreg1 >= MONO_MAX_IREGS)
1504                         g_print (" R%d", ins->sreg1);
1505                 else
1506                         g_print (" %s", mono_arch_regname (ins->sreg1));
1507         }
1508         if (spec [MONO_INST_SRC2]) {
1509                 if (ins->sreg2 >= MONO_MAX_IREGS)
1510                         g_print (" R%d", ins->sreg2);
1511                 else
1512                         g_print (" %s", mono_arch_regname (ins->sreg2));
1513         }
1514         if (spec [MONO_INST_CLOB])
1515                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1516         g_print ("\n");
1517 }
1518
1519 static void
1520 print_regtrack (RegTrack *t, int num)
1521 {
1522         int i;
1523         char buf [32];
1524         const char *r;
1525         
1526         for (i = 0; i < num; ++i) {
1527                 if (!t [i].born_in)
1528                         continue;
1529                 if (i >= MONO_MAX_IREGS) {
1530                         g_snprintf (buf, sizeof(buf), "R%d", i);
1531                         r = buf;
1532                 } else
1533                         r = mono_arch_regname (i);
1534                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1535         }
1536 }
1537
1538 typedef struct InstList InstList;
1539
1540 struct InstList {
1541         InstList *prev;
1542         InstList *next;
1543         MonoInst *data;
1544 };
1545
1546 static inline InstList*
1547 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1548 {
1549         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1550         item->data = data;
1551         item->prev = NULL;
1552         item->next = list;
1553         if (list)
1554                 list->prev = item;
1555         return item;
1556 }
1557
1558 /*
1559  * Force the spilling of the variable in the symbolic register 'reg'.
1560  */
1561 static int
1562 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1563 {
1564         MonoInst *load;
1565         int i, sel, spill;
1566         
1567         sel = cfg->rs->iassign [reg];
1568         /*i = cfg->rs->isymbolic [sel];
1569         g_assert (i == reg);*/
1570         i = reg;
1571         spill = ++cfg->spill_count;
1572         cfg->rs->iassign [i] = -spill - 1;
1573         mono_regstate_free_int (cfg->rs, sel);
1574         /* we need to create a spill var and insert a load to sel after the current instruction */
1575         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1576         load->dreg = sel;
1577         load->inst_basereg = X86_EBP;
1578         load->inst_offset = mono_spillvar_offset (cfg, spill);
1579         if (item->prev) {
1580                 while (ins->next != item->prev->data)
1581                         ins = ins->next;
1582         }
1583         load->next = ins->next;
1584         ins->next = load;
1585         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1586         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1587         g_assert (i == sel);
1588
1589         return sel;
1590 }
1591
1592 static int
1593 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1594 {
1595         MonoInst *load;
1596         int i, sel, spill;
1597
1598         DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1599         /* exclude the registers in the current instruction */
1600         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1601                 if (ins->sreg1 >= MONO_MAX_IREGS)
1602                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1603                 else
1604                         regmask &= ~ (1 << ins->sreg1);
1605                 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1606         }
1607         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1608                 if (ins->sreg2 >= MONO_MAX_IREGS)
1609                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1610                 else
1611                         regmask &= ~ (1 << ins->sreg2);
1612                 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1613         }
1614         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1615                 regmask &= ~ (1 << ins->dreg);
1616                 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1617         }
1618
1619         DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1620         g_assert (regmask); /* need at least a register we can free */
1621         sel = -1;
1622         /* we should track prev_use and spill the register that's farther */
1623         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1624                 if (regmask & (1 << i)) {
1625                         sel = i;
1626                         DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1627                         break;
1628                 }
1629         }
1630         i = cfg->rs->isymbolic [sel];
1631         spill = ++cfg->spill_count;
1632         cfg->rs->iassign [i] = -spill - 1;
1633         mono_regstate_free_int (cfg->rs, sel);
1634         /* we need to create a spill var and insert a load to sel after the current instruction */
1635         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1636         load->dreg = sel;
1637         load->inst_basereg = X86_EBP;
1638         load->inst_offset = mono_spillvar_offset (cfg, spill);
1639         if (item->prev) {
1640                 while (ins->next != item->prev->data)
1641                         ins = ins->next;
1642         }
1643         load->next = ins->next;
1644         ins->next = load;
1645         DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1646         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1647         g_assert (i == sel);
1648         
1649         return sel;
1650 }
1651
1652 static MonoInst*
1653 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1654 {
1655         MonoInst *copy;
1656         MONO_INST_NEW (cfg, copy, OP_MOVE);
1657         copy->dreg = dest;
1658         copy->sreg1 = src;
1659         if (ins) {
1660                 copy->next = ins->next;
1661                 ins->next = copy;
1662         }
1663         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1664         return copy;
1665 }
1666
1667 static MonoInst*
1668 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1669 {
1670         MonoInst *store;
1671         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1672         store->sreg1 = reg;
1673         store->inst_destbasereg = X86_EBP;
1674         store->inst_offset = mono_spillvar_offset (cfg, spill);
1675         if (ins) {
1676                 store->next = ins->next;
1677                 ins->next = store;
1678         }
1679         DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08x(%%ebp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
1680         return store;
1681 }
1682
1683 static void
1684 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1685 {
1686         MonoInst *prev;
1687         if (item->next) {
1688                 prev = item->next->data;
1689
1690                 while (prev->next != ins)
1691                         prev = prev->next;
1692                 to_insert->next = ins;
1693                 prev->next = to_insert;
1694         } else {
1695                 to_insert->next = ins;
1696         }
1697         /* 
1698          * needed otherwise in the next instruction we can add an ins to the 
1699          * end and that would get past this instruction.
1700          */
1701         item->data = to_insert; 
1702 }
1703
1704
1705 #if  0
1706 static int
1707 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1708 {
1709         int val = cfg->rs->iassign [sym_reg];
1710         if (val < 0) {
1711                 int spill = 0;
1712                 if (val < -1) {
1713                         /* the register gets spilled after this inst */
1714                         spill = -val -1;
1715                 }
1716                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1717                 if (val < 0)
1718                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1719                 cfg->rs->iassign [sym_reg] = val;
1720                 /* add option to store before the instruction for src registers */
1721                 if (spill)
1722                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1723         }
1724         cfg->rs->isymbolic [val] = sym_reg;
1725         return val;
1726 }
1727 #endif
1728
1729 /* flags used in reginfo->flags */
1730 enum {
1731         MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
1732         MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
1733         MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
1734         MONO_X86_REG_NOT_ECX                    = 1 << 3,
1735         MONO_X86_REG_EAX                                = 1 << 4,
1736         MONO_X86_REG_EDX                                = 1 << 5,
1737         MONO_X86_REG_ECX                                = 1 << 6
1738 };
1739
1740 static int
1741 mono_x86_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1742 {
1743         int val;
1744         int test_mask = dest_mask;
1745
1746         if (flags & MONO_X86_REG_EAX)
1747                 test_mask &= (1 << X86_EAX);
1748         else if (flags & MONO_X86_REG_EDX)
1749                 test_mask &= (1 << X86_EDX);
1750         else if (flags & MONO_X86_REG_ECX)
1751                 test_mask &= (1 << X86_ECX);
1752         else if (flags & MONO_X86_REG_NOT_ECX)
1753                 test_mask &= ~ (1 << X86_ECX);
1754
1755         val = mono_regstate_alloc_int (cfg->rs, test_mask);
1756         if (val >= 0 && test_mask != dest_mask)
1757                 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1758
1759         if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1760                 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1761                 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << X86_ECX)));
1762         }
1763
1764         if (val < 0) {
1765                 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1766                 if (val < 0)
1767                         val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1768         }
1769
1770         return val;
1771 }
1772
1773 static inline void
1774 assign_ireg (MonoRegState *rs, int reg, int hreg)
1775 {
1776         g_assert (reg >= MONO_MAX_IREGS);
1777         g_assert (hreg < MONO_MAX_IREGS);
1778         g_assert (! is_global_ireg (hreg));
1779
1780         rs->iassign [reg] = hreg;
1781         rs->isymbolic [hreg] = reg;
1782         rs->ifree_mask &= ~ (1 << hreg);
1783 }
1784
1785 /*#include "cprop.c"*/
1786
1787 /*
1788  * Local register allocation.
1789  * We first scan the list of instructions and we save the liveness info of
1790  * each register (when the register is first used, when it's value is set etc.).
1791  * We also reverse the list of instructions (in the InstList list) because assigning
1792  * registers backwards allows for more tricks to be used.
1793  */
1794 void
1795 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1796 {
1797         MonoInst *ins;
1798         MonoRegState *rs = cfg->rs;
1799         int i, val, fpcount;
1800         RegTrack *reginfo, *reginfof;
1801         RegTrack *reginfo1, *reginfo2, *reginfod;
1802         InstList *tmp, *reversed = NULL;
1803         const char *spec;
1804         guint32 src1_mask, src2_mask, dest_mask;
1805         GList *fspill_list = NULL;
1806         int fspill = 0;
1807
1808         if (!bb->code)
1809                 return;
1810         rs->next_vireg = bb->max_ireg;
1811         rs->next_vfreg = bb->max_freg;
1812         mono_regstate_assign (rs);
1813         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1814         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1815         rs->ifree_mask = X86_CALLEE_REGS;
1816
1817         ins = bb->code;
1818
1819         /*if (cfg->opt & MONO_OPT_COPYPROP)
1820                 local_copy_prop (cfg, ins);*/
1821
1822         i = 1;
1823         fpcount = 0;
1824         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1825         /* forward pass on the instructions to collect register liveness info */
1826         while (ins) {
1827                 spec = ins_spec [ins->opcode];
1828                 
1829                 DEBUG (print_ins (i, ins));
1830
1831                 if (spec [MONO_INST_SRC1]) {
1832                         if (spec [MONO_INST_SRC1] == 'f') {
1833                                 GList *spill;
1834                                 reginfo1 = reginfof;
1835
1836                                 spill = g_list_first (fspill_list);
1837                                 if (spill && fpcount < MONO_MAX_FREGS) {
1838                                         reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1839                                         fspill_list = g_list_remove (fspill_list, spill->data);
1840                                 } else
1841                                         fpcount--;
1842                         }
1843                         else
1844                                 reginfo1 = reginfo;
1845                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1846                         reginfo1 [ins->sreg1].last_use = i;
1847                         if (spec [MONO_INST_SRC1] == 'L') {
1848                                 /* The virtual register is allocated sequentially */
1849                                 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1850                                 reginfo1 [ins->sreg1 + 1].last_use = i;
1851                                 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1852                                         reginfo1 [ins->sreg1 + 1].born_in = i;
1853
1854                                 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1855                                 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1856                         }
1857                 } else {
1858                         ins->sreg1 = -1;
1859                 }
1860                 if (spec [MONO_INST_SRC2]) {
1861                         if (spec [MONO_INST_SRC2] == 'f') {
1862                                 GList *spill;
1863                                 reginfo2 = reginfof;
1864                                 spill = g_list_first (fspill_list);
1865                                 if (spill) {
1866                                         reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1867                                         fspill_list = g_list_remove (fspill_list, spill->data);
1868                                         if (fpcount >= MONO_MAX_FREGS) {
1869                                                 fspill++;
1870                                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1871                                                 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1872                                         }
1873                                 } else
1874                                         fpcount--;
1875                         }
1876                         else
1877                                 reginfo2 = reginfo;
1878                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1879                         reginfo2 [ins->sreg2].last_use = i;
1880                         if (spec [MONO_INST_SRC2] == 'L') {
1881                                 /* The virtual register is allocated sequentially */
1882                                 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1883                                 reginfo2 [ins->sreg2 + 1].last_use = i;
1884                                 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1885                                         reginfo2 [ins->sreg2 + 1].born_in = i;
1886                         }
1887                         if (spec [MONO_INST_CLOB] == 's') {
1888                                 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1889                                 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1890                         }
1891                 } else {
1892                         ins->sreg2 = -1;
1893                 }
1894                 if (spec [MONO_INST_DEST]) {
1895                         if (spec [MONO_INST_DEST] == 'f') {
1896                                 reginfod = reginfof;
1897                                 if (fpcount >= MONO_MAX_FREGS) {
1898                                         reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1899                                         fspill++;
1900                                         fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1901                                         fpcount--;
1902                                 }
1903                                 fpcount++;
1904                         }
1905                         else
1906                                 reginfod = reginfo;
1907                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1908                                 reginfod [ins->dreg].killed_in = i;
1909                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1910                         reginfod [ins->dreg].last_use = i;
1911                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1912                                 reginfod [ins->dreg].born_in = i;
1913                         if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
1914                                 /* The virtual register is allocated sequentially */
1915                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1916                                 reginfod [ins->dreg + 1].last_use = i;
1917                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1918                                         reginfod [ins->dreg + 1].born_in = i;
1919
1920                                 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
1921                                 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
1922                         }
1923                 } else {
1924                         ins->dreg = -1;
1925                 }
1926
1927                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
1928                 ++i;
1929                 ins = ins->next;
1930         }
1931
1932         // todo: check if we have anything left on fp stack, in verify mode?
1933         fspill = 0;
1934
1935         DEBUG (print_regtrack (reginfo, rs->next_vireg));
1936         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
1937         tmp = reversed;
1938         while (tmp) {
1939                 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
1940                 dest_mask = src1_mask = src2_mask = X86_CALLEE_REGS;
1941                 --i;
1942                 ins = tmp->data;
1943                 spec = ins_spec [ins->opcode];
1944                 prev_dreg = -1;
1945                 clob_dreg = -1;
1946                 DEBUG (g_print ("processing:"));
1947                 DEBUG (print_ins (i, ins));
1948                 if (spec [MONO_INST_CLOB] == 's') {
1949                         /*
1950                          * Shift opcodes, SREG2 must be RCX
1951                          */
1952                         if (rs->ifree_mask & (1 << X86_ECX)) {
1953                                 if (ins->sreg2 < MONO_MAX_IREGS) {
1954                                         /* Argument already in hard reg, need to copy */
1955                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
1956                                         insert_before_ins (ins, tmp, copy);
1957                                 }
1958                                 else {
1959                                         DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
1960                                         assign_ireg (rs, ins->sreg2, X86_ECX);
1961                                 }
1962                         } else {
1963                                 int need_ecx_spill = TRUE;
1964                                 /* 
1965                                  * we first check if src1/dreg is already assigned a register
1966                                  * and then we force a spill of the var assigned to ECX.
1967                                  */
1968                                 /* the destination register can't be ECX */
1969                                 dest_mask &= ~ (1 << X86_ECX);
1970                                 src1_mask &= ~ (1 << X86_ECX);
1971                                 val = rs->iassign [ins->dreg];
1972                                 /* 
1973                                  * the destination register is already assigned to ECX:
1974                                  * we need to allocate another register for it and then
1975                                  * copy from this to ECX.
1976                                  */
1977                                 if (val == X86_ECX && ins->dreg != ins->sreg2) {
1978                                         int new_dest;
1979                                         new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
1980                                         g_assert (new_dest >= 0);
1981                                         DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
1982
1983                                         rs->isymbolic [new_dest] = ins->dreg;
1984                                         rs->iassign [ins->dreg] = new_dest;
1985                                         clob_dreg = ins->dreg;
1986                                         ins->dreg = new_dest;
1987                                         create_copy_ins (cfg, X86_ECX, new_dest, ins);
1988                                         need_ecx_spill = FALSE;
1989                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
1990                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
1991                                         rs->iassign [ins->dreg] = val;
1992                                         rs->isymbolic [val] = prev_dreg;
1993                                         ins->dreg = val;*/
1994                                 }
1995                                 if (is_global_ireg (ins->sreg2)) {
1996                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
1997                                         insert_before_ins (ins, tmp, copy);
1998                                 }
1999                                 else {
2000                                         val = rs->iassign [ins->sreg2];
2001                                         if (val >= 0 && val != X86_ECX) {
2002                                                 MonoInst *move = create_copy_ins (cfg, X86_ECX, val, NULL);
2003                                                 DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
2004                                                 move->next = ins;
2005                                                 g_assert_not_reached ();
2006                                                 /* FIXME: where is move connected to the instruction list? */
2007                                                 //tmp->prev->data->next = move;
2008                                         }
2009                                         else {
2010                                                 if (val == X86_ECX)
2011                                                 need_ecx_spill = FALSE;
2012                                         }
2013                                 }
2014                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << X86_ECX))) {
2015                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_ECX]));
2016                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_ECX]);
2017                                         mono_regstate_free_int (rs, X86_ECX);
2018                                 }
2019                                 if (!is_global_ireg (ins->sreg2))
2020                                         /* force-set sreg2 */
2021                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2022                         }
2023                         ins->sreg2 = X86_ECX;
2024                 } else if (spec [MONO_INST_CLOB] == 'd') {
2025                         /*
2026                          * DIVISION/REMAINER
2027                          */
2028                         int dest_reg = X86_EAX;
2029                         int clob_reg = X86_EDX;
2030                         if (spec [MONO_INST_DEST] == 'd') {
2031                                 dest_reg = X86_EDX; /* reminder */
2032                                 clob_reg = X86_EAX;
2033                         }
2034                         if (is_global_ireg (ins->dreg))
2035                                 val = ins->dreg;
2036                         else
2037                                 val = rs->iassign [ins->dreg];
2038                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
2039                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2040                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2041                                 mono_regstate_free_int (rs, dest_reg);
2042                         }
2043                         if (val < 0) {
2044                                 if (val < -1) {
2045                                         /* the register gets spilled after this inst */
2046                                         int spill = -val -1;
2047                                         dest_mask = 1 << dest_reg;
2048                                         prev_dreg = ins->dreg;
2049                                         val = mono_regstate_alloc_int (rs, dest_mask);
2050                                         if (val < 0)
2051                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
2052                                         rs->iassign [ins->dreg] = val;
2053                                         if (spill)
2054                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
2055                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2056                                         rs->isymbolic [val] = prev_dreg;
2057                                         ins->dreg = val;
2058                                 } else {
2059                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
2060                                         prev_dreg = ins->dreg;
2061                                         assign_ireg (rs, ins->dreg, dest_reg);
2062                                         ins->dreg = dest_reg;
2063                                         val = dest_reg;
2064                                 }
2065                         }
2066
2067                         //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
2068                         if (val != dest_reg) { /* force a copy */
2069                                 create_copy_ins (cfg, val, dest_reg, ins);
2070                                 if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
2071                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2072                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2073                                         mono_regstate_free_int (rs, dest_reg);
2074                                 }
2075                         }
2076                         if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
2077                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2078                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2079                                 mono_regstate_free_int (rs, clob_reg);
2080                         }
2081                         src1_mask = 1 << X86_EAX;
2082                         src2_mask = 1 << X86_ECX;
2083                 } else if (spec [MONO_INST_DEST] == 'l') {
2084                         int hreg;
2085                         val = rs->iassign [ins->dreg];
2086                         /* check special case when dreg have been moved from ecx (clob shift) */
2087                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2088                                 hreg = clob_dreg + 1;
2089                         else
2090                                 hreg = ins->dreg + 1;
2091
2092                         /* base prev_dreg on fixed hreg, handle clob case */
2093                         val = hreg - 1;
2094
2095                         if (val != rs->isymbolic [X86_EAX] && !(rs->ifree_mask & (1 << X86_EAX))) {
2096                                 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [X86_EAX]));
2097                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2098                                 mono_regstate_free_int (rs, X86_EAX);
2099                         }
2100                         if (hreg != rs->isymbolic [X86_EDX] && !(rs->ifree_mask & (1 << X86_EDX))) {
2101                                 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [X86_EDX]));
2102                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
2103                                 mono_regstate_free_int (rs, X86_EDX);
2104                         }
2105                 } else if (spec [MONO_INST_CLOB] == 'b') {
2106                         /*
2107                          * x86_set_reg instructions, dreg needs to be EAX..EDX
2108                          */     
2109                         dest_mask = (1 << X86_EAX) | (1 << X86_EBX) | (1 << X86_ECX) | (1 << X86_EDX);
2110                         if ((ins->dreg < MONO_MAX_IREGS) && (! (dest_mask & (1 << ins->dreg)))) {
2111                                 /* 
2112                                  * ins->dreg is already a hard reg, need to allocate another
2113                                  * suitable hard reg and make a copy.
2114                                  */
2115                                 int new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2116                                 g_assert (new_dest >= 0);
2117
2118                                 create_copy_ins (cfg, ins->dreg, new_dest, ins);
2119                                 DEBUG (g_print ("\tclob:b changing dreg R%d to %s\n", ins->dreg, mono_arch_regname (new_dest)));
2120                                 ins->dreg = new_dest;
2121
2122                                 /* The hard reg is no longer needed */
2123                                 mono_regstate_free_int (rs, new_dest);
2124                         }
2125                 }
2126
2127                 /*
2128                  * TRACK DREG
2129                  */
2130                 if (spec [MONO_INST_DEST] == 'f') {
2131                         if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
2132                                 GList *spill_node;
2133                                 MonoInst *store;
2134                                 spill_node = g_list_first (fspill_list);
2135                                 g_assert (spill_node);
2136
2137                                 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
2138                                 insert_before_ins (ins, tmp, store);
2139                                 fspill_list = g_list_remove (fspill_list, spill_node->data);
2140                                 fspill--;
2141                         }
2142                 } else if (spec [MONO_INST_DEST] == 'L') {
2143                         int hreg;
2144                         val = rs->iassign [ins->dreg];
2145                         /* check special case when dreg have been moved from ecx (clob shift) */
2146                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2147                                 hreg = clob_dreg + 1;
2148                         else
2149                                 hreg = ins->dreg + 1;
2150
2151                         /* base prev_dreg on fixed hreg, handle clob case */
2152                         prev_dreg = hreg - 1;
2153
2154                         if (val < 0) {
2155                                 int spill = 0;
2156                                 if (val < -1) {
2157                                         /* the register gets spilled after this inst */
2158                                         spill = -val -1;
2159                                 }
2160                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2161                                 rs->iassign [ins->dreg] = val;
2162                                 if (spill)
2163                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2164                         }
2165
2166                         DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
2167  
2168                         rs->isymbolic [val] = hreg - 1;
2169                         ins->dreg = val;
2170                         
2171                         val = rs->iassign [hreg];
2172                         if (val < 0) {
2173                                 int spill = 0;
2174                                 if (val < -1) {
2175                                         /* the register gets spilled after this inst */
2176                                         spill = -val -1;
2177                                 }
2178                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2179                                 rs->iassign [hreg] = val;
2180                                 if (spill)
2181                                         create_spilled_store (cfg, spill, val, hreg, ins);
2182                         }
2183
2184                         DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
2185                         rs->isymbolic [val] = hreg;
2186                         /* save reg allocating into unused */
2187                         ins->unused = val;
2188
2189                         /* check if we can free our long reg */
2190                         if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2191                                 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
2192                                 mono_regstate_free_int (rs, val);
2193                         }
2194                 }
2195                 else if (ins->dreg >= MONO_MAX_IREGS) {
2196                         int hreg;
2197                         val = rs->iassign [ins->dreg];
2198                         if (spec [MONO_INST_DEST] == 'l') {
2199                                 /* check special case when dreg have been moved from ecx (clob shift) */
2200                                 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2201                                         hreg = clob_dreg + 1;
2202                                 else
2203                                         hreg = ins->dreg + 1;
2204
2205                                 /* base prev_dreg on fixed hreg, handle clob case */
2206                                 prev_dreg = hreg - 1;
2207                         } else
2208                                 prev_dreg = ins->dreg;
2209
2210                         if (val < 0) {
2211                                 int spill = 0;
2212                                 if (val < -1) {
2213                                         /* the register gets spilled after this inst */
2214                                         spill = -val -1;
2215                                 }
2216                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2217                                 rs->iassign [ins->dreg] = val;
2218                                 if (spill)
2219                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2220                         }
2221                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2222                         rs->isymbolic [val] = prev_dreg;
2223                         ins->dreg = val;
2224                         /* handle cases where lreg needs to be eax:edx */
2225                         if (spec [MONO_INST_DEST] == 'l') {
2226                                 /* check special case when dreg have been moved from ecx (clob shift) */
2227                                 int hreg = prev_dreg + 1;
2228                                 val = rs->iassign [hreg];
2229                                 if (val < 0) {
2230                                         int spill = 0;
2231                                         if (val < -1) {
2232                                                 /* the register gets spilled after this inst */
2233                                                 spill = -val -1;
2234                                         }
2235                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2236                                         rs->iassign [hreg] = val;
2237                                         if (spill)
2238                                                 create_spilled_store (cfg, spill, val, hreg, ins);
2239                                 }
2240                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
2241                                 rs->isymbolic [val] = hreg;
2242                                 if (ins->dreg == X86_EAX) {
2243                                         if (val != X86_EDX)
2244                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2245                                 } else if (ins->dreg == X86_EDX) {
2246                                         if (val == X86_EAX) {
2247                                                 /* swap */
2248                                                 g_assert_not_reached ();
2249                                         } else {
2250                                                 /* two forced copies */
2251                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2252                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2253                                         }
2254                                 } else {
2255                                         if (val == X86_EDX) {
2256                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2257                                         } else {
2258                                                 /* two forced copies */
2259                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2260                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2261                                         }
2262                                 }
2263                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2264                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
2265                                         mono_regstate_free_int (rs, val);
2266                                 }
2267                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != X86_EAX && spec [MONO_INST_CLOB] != 'd') {
2268                                 /* this instruction only outputs to EAX, need to copy */
2269                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2270                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != X86_EDX && spec [MONO_INST_CLOB] != 'd') {
2271                                 create_copy_ins (cfg, ins->dreg, X86_EDX, ins);
2272                         }
2273                 }
2274                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
2275                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
2276                         mono_regstate_free_int (rs, ins->dreg);
2277                 }
2278                 /* put src1 in EAX if it needs to be */
2279                 if (spec [MONO_INST_SRC1] == 'a') {
2280                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
2281                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
2282                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2283                                 mono_regstate_free_int (rs, X86_EAX);
2284                         }
2285                         if (ins->sreg1 < MONO_MAX_IREGS) {
2286                                 /* The argument is already in a hard reg, need to copy */
2287                                 MonoInst *copy = create_copy_ins (cfg, X86_EAX, ins->sreg1, NULL);
2288                                 insert_before_ins (ins, tmp, copy);
2289                         }
2290                         else
2291                                 /* force-set sreg1 */
2292                                 assign_ireg (rs, ins->sreg1, X86_EAX);
2293                         ins->sreg1 = X86_EAX;
2294                 }
2295
2296                 /*
2297                  * TRACK SREG1
2298                  */
2299                 if (spec [MONO_INST_SRC1] == 'f') {
2300                         if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
2301                                 MonoInst *load;
2302                                 MonoInst *store = NULL;
2303
2304                                 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2305                                         GList *spill_node;
2306                                         spill_node = g_list_first (fspill_list);
2307                                         g_assert (spill_node);
2308
2309                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);          
2310                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2311                                 }
2312
2313                                 fspill++;
2314                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2315                                 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
2316                                 insert_before_ins (ins, tmp, load);
2317                                 if (store) 
2318                                         insert_before_ins (load, tmp, store);
2319                         }
2320                 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
2321                         /* force source to be same as dest */
2322                         assign_ireg (rs, ins->sreg1, ins->dreg);
2323                         assign_ireg (rs, ins->sreg1 + 1, ins->unused);
2324
2325                         DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
2326                         DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
2327
2328                         ins->sreg1 = ins->dreg;
2329                         /* 
2330                          * No need for saving the reg, we know that src1=dest in this cases
2331                          * ins->inst_c0 = ins->unused;
2332                          */
2333                 }
2334                 else if (ins->sreg1 >= MONO_MAX_IREGS) {
2335                         val = rs->iassign [ins->sreg1];
2336                         prev_sreg1 = ins->sreg1;
2337                         if (val < 0) {
2338                                 int spill = 0;
2339                                 if (val < -1) {
2340                                         /* the register gets spilled after this inst */
2341                                         spill = -val -1;
2342                                 }
2343                                 if (0 && ins->opcode == OP_MOVE) {
2344                                         /* 
2345                                          * small optimization: the dest register is already allocated
2346                                          * but the src one is not: we can simply assign the same register
2347                                          * here and peephole will get rid of the instruction later.
2348                                          * This optimization may interfere with the clobbering handling:
2349                                          * it removes a mov operation that will be added again to handle clobbering.
2350                                          * There are also some other issues that should with make testjit.
2351                                          */
2352                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
2353                                         val = rs->iassign [ins->sreg1] = ins->dreg;
2354                                         //g_assert (val >= 0);
2355                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2356                                 } else {
2357                                         //g_assert (val == -1); /* source cannot be spilled */
2358                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
2359                                         rs->iassign [ins->sreg1] = val;
2360                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2361                                 }
2362                                 if (spill) {
2363                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
2364                                         insert_before_ins (ins, tmp, store);
2365                                 }
2366                         }
2367                         rs->isymbolic [val] = prev_sreg1;
2368                         ins->sreg1 = val;
2369                 } else {
2370                         prev_sreg1 = -1;
2371                 }
2372                 /* handle clobbering of sreg1 */
2373                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
2374                         MonoInst *sreg2_copy = NULL;
2375                         MonoInst *copy = NULL;
2376
2377                         if (ins->dreg == ins->sreg2) {
2378                                 /* 
2379                                  * copying sreg1 to dreg could clobber sreg2, so allocate a new
2380                                  * register for it.
2381                                  */
2382                                 int reg2 = 0;
2383
2384                                 reg2 = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->sreg2, 0);
2385
2386                                 DEBUG (g_print ("\tneed to copy sreg2 %s to reg %s\n", mono_arch_regname (ins->sreg2), mono_arch_regname (reg2)));
2387                                 sreg2_copy = create_copy_ins (cfg, reg2, ins->sreg2, NULL);
2388                                 prev_sreg2 = ins->sreg2 = reg2;
2389
2390                                 mono_regstate_free_int (rs, reg2);
2391                         }
2392
2393                         copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
2394                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
2395                         insert_before_ins (ins, tmp, copy);
2396
2397                         if (sreg2_copy)
2398                                 insert_before_ins (copy, tmp, sreg2_copy);
2399
2400                         /*
2401                          * Need to prevent sreg2 to be allocated to sreg1, since that
2402                          * would screw up the previous copy.
2403                          */
2404                         src2_mask &= ~ (1 << ins->sreg1);
2405                         /* we set sreg1 to dest as well */
2406                         prev_sreg1 = ins->sreg1 = ins->dreg;
2407                         src2_mask &= ~ (1 << ins->dreg);
2408                 }
2409
2410                 /*
2411                  * TRACK SREG2
2412                  */
2413                 if (spec [MONO_INST_SRC2] == 'f') {
2414                         if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
2415                                 MonoInst *load;
2416                                 MonoInst *store = NULL;
2417
2418                                 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2419                                         GList *spill_node;
2420
2421                                         spill_node = g_list_first (fspill_list);
2422                                         g_assert (spill_node);
2423                                         if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
2424                                                 spill_node = g_list_next (spill_node);
2425         
2426                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
2427                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2428                                 } 
2429                                 
2430                                 fspill++;
2431                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2432                                 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
2433                                 insert_before_ins (ins, tmp, load);
2434                                 if (store) 
2435                                         insert_before_ins (load, tmp, store);
2436                         }
2437                 } 
2438                 else if (ins->sreg2 >= MONO_MAX_IREGS) {
2439                         val = rs->iassign [ins->sreg2];
2440                         prev_sreg2 = ins->sreg2;
2441                         if (val < 0) {
2442                                 int spill = 0;
2443                                 if (val < -1) {
2444                                         /* the register gets spilled after this inst */
2445                                         spill = -val -1;
2446                                 }
2447                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
2448                                 rs->iassign [ins->sreg2] = val;
2449                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
2450                                 if (spill)
2451                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
2452                         }
2453                         rs->isymbolic [val] = prev_sreg2;
2454                         ins->sreg2 = val;
2455                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != X86_ECX) {
2456                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [X86_ECX]));
2457                         }
2458                 } else {
2459                         prev_sreg2 = -1;
2460                 }
2461
2462                 if (spec [MONO_INST_CLOB] == 'c') {
2463                         int j, s;
2464                         guint32 clob_mask = X86_CALLEE_REGS;
2465                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
2466                                 s = 1 << j;
2467                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2468                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2469                                 }
2470                         }
2471                 }
2472                 if (spec [MONO_INST_CLOB] == 'a') {
2473                         guint32 clob_reg = X86_EAX;
2474                         if (!(rs->ifree_mask & (1 << clob_reg)) && (rs->isymbolic [clob_reg] >= 8)) {
2475                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2476                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2477                                 mono_regstate_free_int (rs, clob_reg);
2478                         }
2479                 }
2480                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2481                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2482                         mono_regstate_free_int (rs, ins->sreg1);
2483                 }
2484                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2485                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2486                         mono_regstate_free_int (rs, ins->sreg2);
2487                 }*/
2488         
2489                 //DEBUG (print_ins (i, ins));
2490                 /* this may result from a insert_before call */
2491                 if (!tmp->next)
2492                         bb->code = tmp->data;
2493                 tmp = tmp->next;
2494         }
2495
2496         g_free (reginfo);
2497         g_free (reginfof);
2498         g_list_free (fspill_list);
2499 }
2500
2501 static unsigned char*
2502 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2503 {
2504         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2505         x86_fnstcw_membase(code, X86_ESP, 0);
2506         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
2507         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2508         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
2509         x86_fldcw_membase (code, X86_ESP, 2);
2510         if (size == 8) {
2511                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2512                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2513                 x86_pop_reg (code, dreg);
2514                 /* FIXME: need the high register 
2515                  * x86_pop_reg (code, dreg_high);
2516                  */
2517         } else {
2518                 x86_push_reg (code, X86_EAX); // SP = SP - 4
2519                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
2520                 x86_pop_reg (code, dreg);
2521         }
2522         x86_fldcw_membase (code, X86_ESP, 0);
2523         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2524
2525         if (size == 1)
2526                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
2527         else if (size == 2)
2528                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
2529         return code;
2530 }
2531
2532 static unsigned char*
2533 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2534 {
2535         int sreg = tree->sreg1;
2536 #ifdef PLATFORM_WIN32
2537         guint8* br[5];
2538
2539         /*
2540          * Under Windows:
2541          * If requested stack size is larger than one page,
2542          * perform stack-touch operation
2543          */
2544         /*
2545          * Generate stack probe code.
2546          * Under Windows, it is necessary to allocate one page at a time,
2547          * "touching" stack after each successful sub-allocation. This is
2548          * because of the way stack growth is implemented - there is a
2549          * guard page before the lowest stack page that is currently commited.
2550          * Stack normally grows sequentially so OS traps access to the
2551          * guard page and commits more pages when needed.
2552          */
2553         x86_test_reg_imm (code, sreg, ~0xFFF);
2554         br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2555
2556         br[2] = code; /* loop */
2557         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
2558         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
2559         x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2560         x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2561         br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2562         x86_patch (br[3], br[2]);
2563         x86_test_reg_reg (code, sreg, sreg);
2564         br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2565         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2566
2567         br[1] = code; x86_jump8 (code, 0);
2568
2569         x86_patch (br[0], code);
2570         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2571         x86_patch (br[1], code);
2572         x86_patch (br[4], code);
2573 #else /* PLATFORM_WIN32 */
2574         x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
2575 #endif
2576         if (tree->flags & MONO_INST_INIT) {
2577                 int offset = 0;
2578                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
2579                         x86_push_reg (code, X86_EAX);
2580                         offset += 4;
2581                 }
2582                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
2583                         x86_push_reg (code, X86_ECX);
2584                         offset += 4;
2585                 }
2586                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
2587                         x86_push_reg (code, X86_EDI);
2588                         offset += 4;
2589                 }
2590                 
2591                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
2592                 if (sreg != X86_ECX)
2593                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
2594                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2595                                 
2596                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2597                 x86_cld (code);
2598                 x86_prefix (code, X86_REP_PREFIX);
2599                 x86_stosl (code);
2600                 
2601                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2602                         x86_pop_reg (code, X86_EDI);
2603                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2604                         x86_pop_reg (code, X86_ECX);
2605                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2606                         x86_pop_reg (code, X86_EAX);
2607         }
2608         return code;
2609 }
2610
2611
2612 static guint8*
2613 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2614 {
2615         CallInfo *cinfo;
2616         int quad;
2617
2618         /* Move return value to the target register */
2619         switch (ins->opcode) {
2620         case CEE_CALL:
2621         case OP_CALL_REG:
2622         case OP_CALL_MEMBASE:
2623                 if (ins->dreg != X86_EAX)
2624                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2625                 break;
2626         case OP_VCALL:
2627         case OP_VCALL_REG:
2628         case OP_VCALL_MEMBASE:
2629                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
2630                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2631                         /* Pop the destination address from the stack */
2632                         x86_pop_reg (code, X86_ECX);
2633                         
2634                         for (quad = 0; quad < 2; quad ++) {
2635                                 switch (cinfo->ret.pair_storage [quad]) {
2636                                 case ArgInIReg:
2637                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
2638                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
2639                                         break;
2640                                 case ArgNone:
2641                                         break;
2642                                 default:
2643                                         g_assert_not_reached ();
2644                                 }
2645                         }
2646                 }
2647                 g_free (cinfo);
2648         default:
2649                 break;
2650         }
2651
2652         return code;
2653 }
2654
2655 #define REAL_PRINT_REG(text,reg) \
2656 mono_assert (reg >= 0); \
2657 x86_push_reg (code, X86_EAX); \
2658 x86_push_reg (code, X86_EDX); \
2659 x86_push_reg (code, X86_ECX); \
2660 x86_push_reg (code, reg); \
2661 x86_push_imm (code, reg); \
2662 x86_push_imm (code, text " %d %p\n"); \
2663 x86_mov_reg_imm (code, X86_EAX, printf); \
2664 x86_call_reg (code, X86_EAX); \
2665 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2666 x86_pop_reg (code, X86_ECX); \
2667 x86_pop_reg (code, X86_EDX); \
2668 x86_pop_reg (code, X86_EAX);
2669
2670 /* benchmark and set based on cpu */
2671 #define LOOP_ALIGNMENT 8
2672 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2673
2674 void
2675 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2676 {
2677         MonoInst *ins;
2678         MonoCallInst *call;
2679         guint offset;
2680         guint8 *code = cfg->native_code + cfg->code_len;
2681         MonoInst *last_ins = NULL;
2682         guint last_offset = 0;
2683         int max_len, cpos;
2684
2685         if (cfg->opt & MONO_OPT_PEEPHOLE)
2686                 peephole_pass (cfg, bb);
2687
2688         if (cfg->opt & MONO_OPT_LOOP) {
2689                 int pad, align = LOOP_ALIGNMENT;
2690                 /* set alignment depending on cpu */
2691                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2692                         pad = align - pad;
2693                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2694                         x86_padding (code, pad);
2695                         cfg->code_len += pad;
2696                         bb->native_offset = cfg->code_len;
2697                 }
2698         }
2699
2700         if (cfg->verbose_level > 2)
2701                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2702
2703         cpos = bb->max_offset;
2704
2705         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2706                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2707                 g_assert (!cfg->compile_aot);
2708                 cpos += 6;
2709
2710                 cov->data [bb->dfn].cil_code = bb->cil_code;
2711                 /* this is not thread save, but good enough */
2712                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2713         }
2714
2715         offset = code - cfg->native_code;
2716
2717         ins = bb->code;
2718         while (ins) {
2719                 offset = code - cfg->native_code;
2720
2721                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2722
2723                 if (offset > (cfg->code_size - max_len - 16)) {
2724                         cfg->code_size *= 2;
2725                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2726                         code = cfg->native_code + offset;
2727                         mono_jit_stats.code_reallocs++;
2728                 }
2729
2730                 mono_debug_record_line_number (cfg, ins, offset);
2731
2732                 switch (ins->opcode) {
2733                 case OP_BIGMUL:
2734                         x86_mul_reg (code, ins->sreg2, TRUE);
2735                         break;
2736                 case OP_BIGMUL_UN:
2737                         x86_mul_reg (code, ins->sreg2, FALSE);
2738                         break;
2739                 case OP_X86_SETEQ_MEMBASE:
2740                 case OP_X86_SETNE_MEMBASE:
2741                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2742                                          ins->inst_basereg, ins->inst_offset, TRUE);
2743                         break;
2744                 case OP_STOREI1_MEMBASE_IMM:
2745                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2746                         break;
2747                 case OP_STOREI2_MEMBASE_IMM:
2748                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2749                         break;
2750                 case OP_STORE_MEMBASE_IMM:
2751                 case OP_STOREI4_MEMBASE_IMM:
2752                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2753                         break;
2754                 case OP_STOREI1_MEMBASE_REG:
2755                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2756                         break;
2757                 case OP_STOREI2_MEMBASE_REG:
2758                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2759                         break;
2760                 case OP_STORE_MEMBASE_REG:
2761                 case OP_STOREI4_MEMBASE_REG:
2762                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2763                         break;
2764                 case CEE_LDIND_I:
2765                 case CEE_LDIND_I4:
2766                 case CEE_LDIND_U4:
2767                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2768                         break;
2769                 case OP_LOADU4_MEM:
2770                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2771                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2772                         break;
2773                 case OP_LOAD_MEMBASE:
2774                 case OP_LOADI4_MEMBASE:
2775                 case OP_LOADU4_MEMBASE:
2776                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2777                         break;
2778                 case OP_LOADU1_MEMBASE:
2779                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2780                         break;
2781                 case OP_LOADI1_MEMBASE:
2782                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2783                         break;
2784                 case OP_LOADU2_MEMBASE:
2785                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2786                         break;
2787                 case OP_LOADI2_MEMBASE:
2788                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2789                         break;
2790                 case CEE_CONV_I1:
2791                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2792                         break;
2793                 case CEE_CONV_I2:
2794                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2795                         break;
2796                 case CEE_CONV_U1:
2797                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2798                         break;
2799                 case CEE_CONV_U2:
2800                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2801                         break;
2802                 case OP_COMPARE:
2803                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2804                         break;
2805                 case OP_COMPARE_IMM:
2806                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2807                         break;
2808                 case OP_X86_COMPARE_MEMBASE_REG:
2809                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2810                         break;
2811                 case OP_X86_COMPARE_MEMBASE_IMM:
2812                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2813                         break;
2814                 case OP_X86_COMPARE_MEMBASE8_IMM:
2815                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2816                         break;
2817                 case OP_X86_COMPARE_REG_MEMBASE:
2818                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2819                         break;
2820                 case OP_X86_COMPARE_MEM_IMM:
2821                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2822                         break;
2823                 case OP_X86_TEST_NULL:
2824                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2825                         break;
2826                 case OP_X86_ADD_MEMBASE_IMM:
2827                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2828                         break;
2829                 case OP_X86_ADD_MEMBASE:
2830                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2831                         break;
2832                 case OP_X86_SUB_MEMBASE_IMM:
2833                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2834                         break;
2835                 case OP_X86_SUB_MEMBASE:
2836                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2837                         break;
2838                 case OP_X86_INC_MEMBASE:
2839                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2840                         break;
2841                 case OP_X86_INC_REG:
2842                         x86_inc_reg (code, ins->dreg);
2843                         break;
2844                 case OP_X86_DEC_MEMBASE:
2845                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2846                         break;
2847                 case OP_X86_DEC_REG:
2848                         x86_dec_reg (code, ins->dreg);
2849                         break;
2850                 case OP_X86_MUL_MEMBASE:
2851                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2852                         break;
2853                 case CEE_BREAK:
2854                         x86_breakpoint (code);
2855                         break;
2856                 case OP_ADDCC:
2857                 case CEE_ADD:
2858                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2859                         break;
2860                 case OP_ADC:
2861                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2862                         break;
2863                 case OP_ADDCC_IMM:
2864                 case OP_ADD_IMM:
2865                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2866                         break;
2867                 case OP_ADC_IMM:
2868                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2869                         break;
2870                 case OP_SUBCC:
2871                 case CEE_SUB:
2872                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2873                         break;
2874                 case OP_SBB:
2875                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2876                         break;
2877                 case OP_SUBCC_IMM:
2878                 case OP_SUB_IMM:
2879                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2880                         break;
2881                 case OP_SBB_IMM:
2882                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2883                         break;
2884                 case CEE_AND:
2885                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2886                         break;
2887                 case OP_AND_IMM:
2888                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2889                         break;
2890                 case CEE_DIV:
2891                         x86_cdq (code);
2892                         x86_div_reg (code, ins->sreg2, TRUE);
2893                         break;
2894                 case CEE_DIV_UN:
2895                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2896                         x86_div_reg (code, ins->sreg2, FALSE);
2897                         break;
2898                 case OP_DIV_IMM:
2899                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2900                         x86_cdq (code);
2901                         x86_div_reg (code, ins->sreg2, TRUE);
2902                         break;
2903                 case CEE_REM:
2904                         x86_cdq (code);
2905                         x86_div_reg (code, ins->sreg2, TRUE);
2906                         break;
2907                 case CEE_REM_UN:
2908                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2909                         x86_div_reg (code, ins->sreg2, FALSE);
2910                         break;
2911                 case OP_REM_IMM:
2912                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2913                         x86_cdq (code);
2914                         x86_div_reg (code, ins->sreg2, TRUE);
2915                         break;
2916                 case CEE_OR:
2917                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2918                         break;
2919                 case OP_OR_IMM:
2920                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2921                         break;
2922                 case CEE_XOR:
2923                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2924                         break;
2925                 case OP_XOR_IMM:
2926                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2927                         break;
2928                 case CEE_SHL:
2929                         g_assert (ins->sreg2 == X86_ECX);
2930                         x86_shift_reg (code, X86_SHL, ins->dreg);
2931                         break;
2932                 case CEE_SHR:
2933                         g_assert (ins->sreg2 == X86_ECX);
2934                         x86_shift_reg (code, X86_SAR, ins->dreg);
2935                         break;
2936                 case OP_SHR_IMM:
2937                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2938                         break;
2939                 case OP_SHR_UN_IMM:
2940                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2941                         break;
2942                 case CEE_SHR_UN:
2943                         g_assert (ins->sreg2 == X86_ECX);
2944                         x86_shift_reg (code, X86_SHR, ins->dreg);
2945                         break;
2946                 case OP_SHL_IMM:
2947                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2948                         break;
2949                 case OP_LSHL: {
2950                         guint8 *jump_to_end;
2951
2952                         /* handle shifts below 32 bits */
2953                         x86_shld_reg (code, ins->unused, ins->sreg1);
2954                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2955
2956                         x86_test_reg_imm (code, X86_ECX, 32);
2957                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2958
2959                         /* handle shift over 32 bit */
2960                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
2961                         x86_clear_reg (code, ins->sreg1);
2962                         
2963                         x86_patch (jump_to_end, code);
2964                         }
2965                         break;
2966                 case OP_LSHR: {
2967                         guint8 *jump_to_end;
2968
2969                         /* handle shifts below 32 bits */
2970                         x86_shrd_reg (code, ins->sreg1, ins->unused);
2971                         x86_shift_reg (code, X86_SAR, ins->unused);
2972
2973                         x86_test_reg_imm (code, X86_ECX, 32);
2974                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2975
2976                         /* handle shifts over 31 bits */
2977                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2978                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
2979                         
2980                         x86_patch (jump_to_end, code);
2981                         }
2982                         break;
2983                 case OP_LSHR_UN: {
2984                         guint8 *jump_to_end;
2985
2986                         /* handle shifts below 32 bits */
2987                         x86_shrd_reg (code, ins->sreg1, ins->unused);
2988                         x86_shift_reg (code, X86_SHR, ins->unused);
2989
2990                         x86_test_reg_imm (code, X86_ECX, 32);
2991                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2992
2993                         /* handle shifts over 31 bits */
2994                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2995                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
2996                         
2997                         x86_patch (jump_to_end, code);
2998                         }
2999                         break;
3000                 case OP_LSHL_IMM:
3001                         if (ins->inst_imm >= 32) {
3002                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3003                                 x86_clear_reg (code, ins->sreg1);
3004                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
3005                         } else {
3006                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
3007                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
3008                         }
3009                         break;
3010                 case OP_LSHR_IMM:
3011                         if (ins->inst_imm >= 32) {
3012                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
3013                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
3014                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
3015                         } else {
3016                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3017                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
3018                         }
3019                         break;
3020                 case OP_LSHR_UN_IMM:
3021                         if (ins->inst_imm >= 32) {
3022                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3023                                 x86_clear_reg (code, ins->unused);
3024                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
3025                         } else {
3026                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3027                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
3028                         }
3029                         break;
3030                 case CEE_NOT:
3031                         x86_not_reg (code, ins->sreg1);
3032                         break;
3033                 case CEE_NEG:
3034                         x86_neg_reg (code, ins->sreg1);
3035                         break;
3036                 case OP_SEXT_I1:
3037                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3038                         break;
3039                 case OP_SEXT_I2:
3040                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3041                         break;
3042                 case CEE_MUL:
3043                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3044                         break;
3045                 case OP_MUL_IMM:
3046                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
3047                         break;
3048                 case CEE_MUL_OVF:
3049                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3050                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3051                         break;
3052                 case CEE_MUL_OVF_UN: {
3053                         /* the mul operation and the exception check should most likely be split */
3054                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
3055                         /*g_assert (ins->sreg2 == X86_EAX);
3056                         g_assert (ins->dreg == X86_EAX);*/
3057                         if (ins->sreg2 == X86_EAX) {
3058                                 non_eax_reg = ins->sreg1;
3059                         } else if (ins->sreg1 == X86_EAX) {
3060                                 non_eax_reg = ins->sreg2;
3061                         } else {
3062                                 /* no need to save since we're going to store to it anyway */
3063                                 if (ins->dreg != X86_EAX) {
3064                                         saved_eax = TRUE;
3065                                         x86_push_reg (code, X86_EAX);
3066                                 }
3067                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
3068                                 non_eax_reg = ins->sreg2;
3069                         }
3070                         if (ins->dreg == X86_EDX) {
3071                                 if (!saved_eax) {
3072                                         saved_eax = TRUE;
3073                                         x86_push_reg (code, X86_EAX);
3074                                 }
3075                         } else if (ins->dreg != X86_EAX) {
3076                                 saved_edx = TRUE;
3077                                 x86_push_reg (code, X86_EDX);
3078                         }
3079                         x86_mul_reg (code, non_eax_reg, FALSE);
3080                         /* save before the check since pop and mov don't change the flags */
3081                         if (ins->dreg != X86_EAX)
3082                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3083                         if (saved_edx)
3084                                 x86_pop_reg (code, X86_EDX);
3085                         if (saved_eax)
3086                                 x86_pop_reg (code, X86_EAX);
3087                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3088                         break;
3089                 }
3090                 case OP_ICONST:
3091                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
3092                         break;
3093                 case OP_AOTCONST:
3094                         g_assert_not_reached ();
3095                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
3096                         x86_mov_reg_imm (code, ins->dreg, 0);
3097                         break;
3098                 case OP_LOAD_GOTADDR:
3099                         x86_call_imm (code, 0);
3100                         /* 
3101                          * The patch needs to point to the pop, since the GOT offset needs 
3102                          * to be added to that address.
3103                          */
3104                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3105                         x86_pop_reg (code, ins->dreg);
3106                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
3107                         break;
3108                 case OP_GOT_ENTRY:
3109                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3110                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
3111                         break;
3112                 case OP_X86_PUSH_GOT_ENTRY:
3113                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3114                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
3115                         break;
3116                 case CEE_CONV_I4:
3117                 case OP_MOVE:
3118                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3119                         break;
3120                 case CEE_CONV_U4:
3121                         g_assert_not_reached ();
3122                 case CEE_JMP: {
3123                         /*
3124                          * Note: this 'frame destruction' logic is useful for tail calls, too.
3125                          * Keep in sync with the code in emit_epilog.
3126                          */
3127                         int pos = 0;
3128
3129                         /* FIXME: no tracing support... */
3130                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3131                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3132                         /* reset offset to make max_len work */
3133                         offset = code - cfg->native_code;
3134
3135                         g_assert (!cfg->method->save_lmf);
3136
3137                         if (cfg->used_int_regs & (1 << X86_EBX))
3138                                 pos -= 4;
3139                         if (cfg->used_int_regs & (1 << X86_EDI))
3140                                 pos -= 4;
3141                         if (cfg->used_int_regs & (1 << X86_ESI))
3142                                 pos -= 4;
3143                         if (pos)
3144                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3145         
3146                         if (cfg->used_int_regs & (1 << X86_ESI))
3147                                 x86_pop_reg (code, X86_ESI);
3148                         if (cfg->used_int_regs & (1 << X86_EDI))
3149                                 x86_pop_reg (code, X86_EDI);
3150                         if (cfg->used_int_regs & (1 << X86_EBX))
3151                                 x86_pop_reg (code, X86_EBX);
3152         
3153                         /* restore ESP/EBP */
3154                         x86_leave (code);
3155                         offset = code - cfg->native_code;
3156                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3157                         x86_jump32 (code, 0);
3158                         break;
3159                 }
3160                 case OP_CHECK_THIS:
3161                         /* ensure ins->sreg1 is not NULL
3162                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
3163                          * cmp DWORD PTR [eax], 0
3164                          */
3165                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
3166                         break;
3167                 case OP_ARGLIST: {
3168                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
3169                         x86_push_reg (code, hreg);
3170                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
3171                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
3172                         x86_pop_reg (code, hreg);
3173                         break;
3174                 }
3175                 case OP_FCALL:
3176                 case OP_LCALL:
3177                 case OP_VCALL:
3178                 case OP_VOIDCALL:
3179                 case CEE_CALL:
3180                         call = (MonoCallInst*)ins;
3181                         if (ins->flags & MONO_INST_HAS_METHOD)
3182                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
3183                         else
3184                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
3185                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention)) {
3186                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
3187                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
3188                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
3189                                  * smart enough to do that optimization yet
3190                                  *
3191                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
3192                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
3193                                  * (most likely from locality benefits). People with other processors should
3194                                  * check on theirs to see what happens.
3195                                  */
3196                                 if (call->stack_usage == 4) {
3197                                         /* we want to use registers that won't get used soon, so use
3198                                          * ecx, as eax will get allocated first. edx is used by long calls,
3199                                          * so we can't use that.
3200                                          */
3201                                         
3202                                         x86_pop_reg (code, X86_ECX);
3203                                 } else {
3204                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3205                                 }
3206                         }
3207                         code = emit_move_return_value (cfg, ins, code);
3208                         break;
3209                 case OP_FCALL_REG:
3210                 case OP_LCALL_REG:
3211                 case OP_VCALL_REG:
3212                 case OP_VOIDCALL_REG:
3213                 case OP_CALL_REG:
3214                         call = (MonoCallInst*)ins;
3215                         x86_call_reg (code, ins->sreg1);
3216                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention)) {
3217                                 if (call->stack_usage == 4)
3218                                         x86_pop_reg (code, X86_ECX);
3219                                 else
3220                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3221                         }
3222                         code = emit_move_return_value (cfg, ins, code);
3223                         break;
3224                 case OP_FCALL_MEMBASE:
3225                 case OP_LCALL_MEMBASE:
3226                 case OP_VCALL_MEMBASE:
3227                 case OP_VOIDCALL_MEMBASE:
3228                 case OP_CALL_MEMBASE:
3229                         call = (MonoCallInst*)ins;
3230                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
3231                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention)) {
3232                                 if (call->stack_usage == 4)
3233                                         x86_pop_reg (code, X86_ECX);
3234                                 else
3235                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3236                         }
3237                         code = emit_move_return_value (cfg, ins, code);
3238                         break;
3239                 case OP_OUTARG:
3240                 case OP_X86_PUSH:
3241                         x86_push_reg (code, ins->sreg1);
3242                         break;
3243                 case OP_X86_PUSH_IMM:
3244                         x86_push_imm (code, ins->inst_imm);
3245                         break;
3246                 case OP_X86_PUSH_MEMBASE:
3247                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
3248                         break;
3249                 case OP_X86_PUSH_OBJ: 
3250                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
3251                         x86_push_reg (code, X86_EDI);
3252                         x86_push_reg (code, X86_ESI);
3253                         x86_push_reg (code, X86_ECX);
3254                         if (ins->inst_offset)
3255                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
3256                         else
3257                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
3258                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
3259                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
3260                         x86_cld (code);
3261                         x86_prefix (code, X86_REP_PREFIX);
3262                         x86_movsd (code);
3263                         x86_pop_reg (code, X86_ECX);
3264                         x86_pop_reg (code, X86_ESI);
3265                         x86_pop_reg (code, X86_EDI);
3266                         break;
3267                 case OP_X86_LEA:
3268                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
3269                         break;
3270                 case OP_X86_LEA_MEMBASE:
3271                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3272                         break;
3273                 case OP_X86_XCHG:
3274                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3275                         break;
3276                 case OP_LOCALLOC:
3277                         /* keep alignment */
3278                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
3279                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
3280                         code = mono_emit_stack_alloc (code, ins);
3281                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
3282                         break;
3283                 case CEE_RET:
3284                         x86_ret (code);
3285                         break;
3286                 case CEE_THROW: {
3287                         x86_push_reg (code, ins->sreg1);
3288                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3289                                                           (gpointer)"mono_arch_throw_exception");
3290                         break;
3291                 }
3292                 case OP_RETHROW: {
3293                         x86_push_reg (code, ins->sreg1);
3294                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3295                                                           (gpointer)"mono_arch_rethrow_exception");
3296                         break;
3297                 }
3298                 case OP_CALL_HANDLER: 
3299                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3300                         x86_call_imm (code, 0);
3301                         break;
3302                 case OP_LABEL:
3303                         ins->inst_c0 = code - cfg->native_code;
3304                         break;
3305                 case CEE_BR:
3306                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
3307                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
3308                         //break;
3309                         if (ins->flags & MONO_INST_BRLABEL) {
3310                                 if (ins->inst_i0->inst_c0) {
3311                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
3312                                 } else {
3313                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
3314                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3315                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
3316                                                 x86_jump8 (code, 0);
3317                                         else 
3318                                                 x86_jump32 (code, 0);
3319                                 }
3320                         } else {
3321                                 if (ins->inst_target_bb->native_offset) {
3322                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
3323                                 } else {
3324                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3325                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3326                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3327                                                 x86_jump8 (code, 0);
3328                                         else 
3329                                                 x86_jump32 (code, 0);
3330                                 } 
3331                         }
3332                         break;
3333                 case OP_BR_REG:
3334                         x86_jump_reg (code, ins->sreg1);
3335                         break;
3336                 case OP_CEQ:
3337                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3338                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3339                         break;
3340                 case OP_CLT:
3341                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
3342                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3343                         break;
3344                 case OP_CLT_UN:
3345                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3346                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3347                         break;
3348                 case OP_CGT:
3349                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
3350                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3351                         break;
3352                 case OP_CGT_UN:
3353                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3354                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3355                         break;
3356                 case OP_CNE:
3357                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
3358                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3359                         break;
3360                 case OP_COND_EXC_EQ:
3361                 case OP_COND_EXC_NE_UN:
3362                 case OP_COND_EXC_LT:
3363                 case OP_COND_EXC_LT_UN:
3364                 case OP_COND_EXC_GT:
3365                 case OP_COND_EXC_GT_UN:
3366                 case OP_COND_EXC_GE:
3367                 case OP_COND_EXC_GE_UN:
3368                 case OP_COND_EXC_LE:
3369                 case OP_COND_EXC_LE_UN:
3370                 case OP_COND_EXC_OV:
3371                 case OP_COND_EXC_NO:
3372                 case OP_COND_EXC_C:
3373                 case OP_COND_EXC_NC:
3374                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
3375                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3376                         break;
3377                 case CEE_BEQ:
3378                 case CEE_BNE_UN:
3379                 case CEE_BLT:
3380                 case CEE_BLT_UN:
3381                 case CEE_BGT:
3382                 case CEE_BGT_UN:
3383                 case CEE_BGE:
3384                 case CEE_BGE_UN:
3385                 case CEE_BLE:
3386                 case CEE_BLE_UN:
3387                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
3388                         break;
3389
3390                 /* floating point opcodes */
3391                 case OP_R8CONST: {
3392                         double d = *(double *)ins->inst_p0;
3393
3394                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3395                                 x86_fldz (code);
3396                         } else if (d == 1.0) {
3397                                 x86_fld1 (code);
3398                         } else {
3399                                 if (cfg->compile_aot) {
3400                                         guint32 *val = (guint32*)&d;
3401                                         x86_push_imm (code, val [1]);
3402                                         x86_push_imm (code, val [0]);
3403                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
3404                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3405                                 }
3406                                 else {
3407                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3408                                         x86_fld (code, NULL, TRUE);
3409                                 }
3410                         }
3411                         break;
3412                 }
3413                 case OP_R4CONST: {
3414                         float f = *(float *)ins->inst_p0;
3415
3416                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3417                                 x86_fldz (code);
3418                         } else if (f == 1.0) {
3419                                 x86_fld1 (code);
3420                         } else {
3421                                 if (cfg->compile_aot) {
3422                                         guint32 val = *(guint32*)&f;
3423                                         x86_push_imm (code, val);
3424                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
3425                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3426                                 }
3427                                 else {
3428                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3429                                         x86_fld (code, NULL, FALSE);
3430                                 }
3431                         }
3432                         break;
3433                 }
3434                 case OP_STORER8_MEMBASE_REG:
3435                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3436                         break;
3437                 case OP_LOADR8_SPILL_MEMBASE:
3438                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3439                         x86_fxch (code, 1);
3440                         break;
3441                 case OP_LOADR8_MEMBASE:
3442                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3443                         break;
3444                 case OP_STORER4_MEMBASE_REG:
3445                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3446                         break;
3447                 case OP_LOADR4_MEMBASE:
3448                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3449                         break;
3450                 case CEE_CONV_R4: /* FIXME: change precision */
3451                 case CEE_CONV_R8:
3452                         x86_push_reg (code, ins->sreg1);
3453                         x86_fild_membase (code, X86_ESP, 0, FALSE);
3454                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3455                         break;
3456                 case OP_X86_FP_LOAD_I8:
3457                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3458                         break;
3459                 case OP_X86_FP_LOAD_I4:
3460                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3461                         break;
3462                 case OP_FCONV_TO_I1:
3463                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3464                         break;
3465                 case OP_FCONV_TO_U1:
3466                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3467                         break;
3468                 case OP_FCONV_TO_I2:
3469                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3470                         break;
3471                 case OP_FCONV_TO_U2:
3472                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3473                         break;
3474                 case OP_FCONV_TO_I4:
3475                 case OP_FCONV_TO_I:
3476                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3477                         break;
3478                 case OP_FCONV_TO_I8:
3479                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3480                         x86_fnstcw_membase(code, X86_ESP, 0);
3481                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3482                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3483                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3484                         x86_fldcw_membase (code, X86_ESP, 2);
3485                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3486                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3487                         x86_pop_reg (code, ins->dreg);
3488                         x86_pop_reg (code, ins->unused);
3489                         x86_fldcw_membase (code, X86_ESP, 0);
3490                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3491                         break;
3492                 case OP_LCONV_TO_R_UN: { 
3493                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3494                         guint8 *br;
3495
3496                         /* load 64bit integer to FP stack */
3497                         x86_push_imm (code, 0);
3498                         x86_push_reg (code, ins->sreg2);
3499                         x86_push_reg (code, ins->sreg1);
3500                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3501                         /* store as 80bit FP value */
3502                         x86_fst80_membase (code, X86_ESP, 0);
3503                         
3504                         /* test if lreg is negative */
3505                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3506                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3507         
3508                         /* add correction constant mn */
3509                         x86_fld80_mem (code, mn);
3510                         x86_fld80_membase (code, X86_ESP, 0);
3511                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3512                         x86_fst80_membase (code, X86_ESP, 0);
3513
3514                         x86_patch (br, code);
3515
3516                         x86_fld80_membase (code, X86_ESP, 0);
3517                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
3518
3519                         break;
3520                 }
3521                 case OP_LCONV_TO_OVF_I: {
3522                         guint8 *br [3], *label [1];
3523
3524                         /* 
3525                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3526                          */
3527                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3528
3529                         /* If the low word top bit is set, see if we are negative */
3530                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3531                         /* We are not negative (no top bit set, check for our top word to be zero */
3532                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3533                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3534                         label [0] = code;
3535
3536                         /* throw exception */
3537                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3538                         x86_jump32 (code, 0);
3539         
3540                         x86_patch (br [0], code);
3541                         /* our top bit is set, check that top word is 0xfffffff */
3542                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3543                 
3544                         x86_patch (br [1], code);
3545                         /* nope, emit exception */
3546                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3547                         x86_patch (br [2], label [0]);
3548
3549                         if (ins->dreg != ins->sreg1)
3550                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3551                         break;
3552                 }
3553                 case OP_FADD:
3554                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3555                         break;
3556                 case OP_FSUB:
3557                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3558                         break;          
3559                 case OP_FMUL:
3560                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3561                         break;          
3562                 case OP_FDIV:
3563                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3564                         break;          
3565                 case OP_FNEG:
3566                         x86_fchs (code);
3567                         break;          
3568                 case OP_SIN:
3569                         x86_fsin (code);
3570                         x86_fldz (code);
3571                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3572                         break;          
3573                 case OP_COS:
3574                         x86_fcos (code);
3575                         x86_fldz (code);
3576                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3577                         break;          
3578                 case OP_ABS:
3579                         x86_fabs (code);
3580                         break;          
3581                 case OP_TAN: {
3582                         /* 
3583                          * it really doesn't make sense to inline all this code,
3584                          * it's here just to show that things may not be as simple 
3585                          * as they appear.
3586                          */
3587                         guchar *check_pos, *end_tan, *pop_jump;
3588                         x86_push_reg (code, X86_EAX);
3589                         x86_fptan (code);
3590                         x86_fnstsw (code);
3591                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3592                         check_pos = code;
3593                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3594                         x86_fstp (code, 0); /* pop the 1.0 */
3595                         end_tan = code;
3596                         x86_jump8 (code, 0);
3597                         x86_fldpi (code);
3598                         x86_fp_op (code, X86_FADD, 0);
3599                         x86_fxch (code, 1);
3600                         x86_fprem1 (code);
3601                         x86_fstsw (code);
3602                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3603                         pop_jump = code;
3604                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3605                         x86_fstp (code, 1);
3606                         x86_fptan (code);
3607                         x86_patch (pop_jump, code);
3608                         x86_fstp (code, 0); /* pop the 1.0 */
3609                         x86_patch (check_pos, code);
3610                         x86_patch (end_tan, code);
3611                         x86_fldz (code);
3612                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3613                         x86_pop_reg (code, X86_EAX);
3614                         break;
3615                 }
3616                 case OP_ATAN:
3617                         x86_fld1 (code);
3618                         x86_fpatan (code);
3619                         x86_fldz (code);
3620                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3621                         break;          
3622                 case OP_SQRT:
3623                         x86_fsqrt (code);
3624                         break;          
3625                 case OP_X86_FPOP:
3626                         x86_fstp (code, 0);
3627                         break;          
3628                 case OP_FREM: {
3629                         guint8 *l1, *l2;
3630
3631                         x86_push_reg (code, X86_EAX);
3632                         /* we need to exchange ST(0) with ST(1) */
3633                         x86_fxch (code, 1);
3634
3635                         /* this requires a loop, because fprem somtimes 
3636                          * returns a partial remainder */
3637                         l1 = code;
3638                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3639                         /* x86_fprem1 (code); */
3640                         x86_fprem (code);
3641                         x86_fnstsw (code);
3642                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3643                         l2 = code + 2;
3644                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3645
3646                         /* pop result */
3647                         x86_fstp (code, 1);
3648
3649                         x86_pop_reg (code, X86_EAX);
3650                         break;
3651                 }
3652                 case OP_FCOMPARE:
3653                         if (cfg->opt & MONO_OPT_FCMOV) {
3654                                 x86_fcomip (code, 1);
3655                                 x86_fstp (code, 0);
3656                                 break;
3657                         }
3658                         /* this overwrites EAX */
3659                         EMIT_FPCOMPARE(code);
3660                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3661                         break;
3662                 case OP_FCEQ:
3663                         if (cfg->opt & MONO_OPT_FCMOV) {
3664                                 /* zeroing the register at the start results in 
3665                                  * shorter and faster code (we can also remove the widening op)
3666                                  */
3667                                 guchar *unordered_check;
3668                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3669                                 x86_fcomip (code, 1);
3670                                 x86_fstp (code, 0);
3671                                 unordered_check = code;
3672                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3673                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3674                                 x86_patch (unordered_check, code);
3675                                 break;
3676                         }
3677                         if (ins->dreg != X86_EAX) 
3678                                 x86_push_reg (code, X86_EAX);
3679
3680                         EMIT_FPCOMPARE(code);
3681                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3682                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3683                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3684                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3685
3686                         if (ins->dreg != X86_EAX) 
3687                                 x86_pop_reg (code, X86_EAX);
3688                         break;
3689                 case OP_FCLT:
3690                 case OP_FCLT_UN:
3691                         if (cfg->opt & MONO_OPT_FCMOV) {
3692                                 /* zeroing the register at the start results in 
3693                                  * shorter and faster code (we can also remove the widening op)
3694                                  */
3695                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3696                                 x86_fcomip (code, 1);
3697                                 x86_fstp (code, 0);
3698                                 if (ins->opcode == OP_FCLT_UN) {
3699                                         guchar *unordered_check = code;
3700                                         guchar *jump_to_end;
3701                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3702                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3703                                         jump_to_end = code;
3704                                         x86_jump8 (code, 0);
3705                                         x86_patch (unordered_check, code);
3706                                         x86_inc_reg (code, ins->dreg);
3707                                         x86_patch (jump_to_end, code);
3708                                 } else {
3709                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3710                                 }
3711                                 break;
3712                         }
3713                         if (ins->dreg != X86_EAX) 
3714                                 x86_push_reg (code, X86_EAX);
3715
3716                         EMIT_FPCOMPARE(code);
3717                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3718                         if (ins->opcode == OP_FCLT_UN) {
3719                                 guchar *is_not_zero_check, *end_jump;
3720                                 is_not_zero_check = code;
3721                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3722                                 end_jump = code;
3723                                 x86_jump8 (code, 0);
3724                                 x86_patch (is_not_zero_check, code);
3725                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3726
3727                                 x86_patch (end_jump, code);
3728                         }
3729                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3730                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3731
3732                         if (ins->dreg != X86_EAX) 
3733                                 x86_pop_reg (code, X86_EAX);
3734                         break;
3735                 case OP_FCGT:
3736                 case OP_FCGT_UN:
3737                         if (cfg->opt & MONO_OPT_FCMOV) {
3738                                 /* zeroing the register at the start results in 
3739                                  * shorter and faster code (we can also remove the widening op)
3740                                  */
3741                                 guchar *unordered_check;
3742                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3743                                 x86_fcomip (code, 1);
3744                                 x86_fstp (code, 0);
3745                                 if (ins->opcode == OP_FCGT) {
3746                                         unordered_check = code;
3747                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3748                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3749                                         x86_patch (unordered_check, code);
3750                                 } else {
3751                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3752                                 }
3753                                 break;
3754                         }
3755                         if (ins->dreg != X86_EAX) 
3756                                 x86_push_reg (code, X86_EAX);
3757
3758                         EMIT_FPCOMPARE(code);
3759                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3760                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3761                         if (ins->opcode == OP_FCGT_UN) {
3762                                 guchar *is_not_zero_check, *end_jump;
3763                                 is_not_zero_check = code;
3764                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3765                                 end_jump = code;
3766                                 x86_jump8 (code, 0);
3767                                 x86_patch (is_not_zero_check, code);
3768                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3769         
3770                                 x86_patch (end_jump, code);
3771                         }
3772                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3773                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3774
3775                         if (ins->dreg != X86_EAX) 
3776                                 x86_pop_reg (code, X86_EAX);
3777                         break;
3778                 case OP_FBEQ:
3779                         if (cfg->opt & MONO_OPT_FCMOV) {
3780                                 guchar *jump = code;
3781                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3782                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3783                                 x86_patch (jump, code);
3784                                 break;
3785                         }
3786                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3787                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3788                         break;
3789                 case OP_FBNE_UN:
3790                         /* Branch if C013 != 100 */
3791                         if (cfg->opt & MONO_OPT_FCMOV) {
3792                                 /* branch if !ZF or (PF|CF) */
3793                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3794                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3795                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3796                                 break;
3797                         }
3798                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3799                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3800                         break;
3801                 case OP_FBLT:
3802                         if (cfg->opt & MONO_OPT_FCMOV) {
3803                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3804                                 break;
3805                         }
3806                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3807                         break;
3808                 case OP_FBLT_UN:
3809                         if (cfg->opt & MONO_OPT_FCMOV) {
3810                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3811                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3812                                 break;
3813                         }
3814                         if (ins->opcode == OP_FBLT_UN) {
3815                                 guchar *is_not_zero_check, *end_jump;
3816                                 is_not_zero_check = code;
3817                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3818                                 end_jump = code;
3819                                 x86_jump8 (code, 0);
3820                                 x86_patch (is_not_zero_check, code);
3821                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3822
3823                                 x86_patch (end_jump, code);
3824                         }
3825                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3826                         break;
3827                 case OP_FBGT:
3828                 case OP_FBGT_UN:
3829                         if (cfg->opt & MONO_OPT_FCMOV) {
3830                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3831                                 break;
3832                         }
3833                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3834                         if (ins->opcode == OP_FBGT_UN) {
3835                                 guchar *is_not_zero_check, *end_jump;
3836                                 is_not_zero_check = code;
3837                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3838                                 end_jump = code;
3839                                 x86_jump8 (code, 0);
3840                                 x86_patch (is_not_zero_check, code);
3841                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3842
3843                                 x86_patch (end_jump, code);
3844                         }
3845                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3846                         break;
3847                 case OP_FBGE:
3848                         /* Branch if C013 == 100 or 001 */
3849                         if (cfg->opt & MONO_OPT_FCMOV) {
3850                                 guchar *br1;
3851
3852                                 /* skip branch if C1=1 */
3853                                 br1 = code;
3854                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3855                                 /* branch if (C0 | C3) = 1 */
3856                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3857                                 x86_patch (br1, code);
3858                                 break;
3859                         }
3860                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3861                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3862                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3863                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3864                         break;
3865                 case OP_FBGE_UN:
3866                         /* Branch if C013 == 000 */
3867                         if (cfg->opt & MONO_OPT_FCMOV) {
3868                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3869                                 break;
3870                         }
3871                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3872                         break;
3873                 case OP_FBLE:
3874                         /* Branch if C013=000 or 100 */
3875                         if (cfg->opt & MONO_OPT_FCMOV) {
3876                                 guchar *br1;
3877
3878                                 /* skip branch if C1=1 */
3879                                 br1 = code;
3880                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3881                                 /* branch if C0=0 */
3882                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3883                                 x86_patch (br1, code);
3884                                 break;
3885                         }
3886                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3887                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3888                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3889                         break;
3890                 case OP_FBLE_UN:
3891                         /* Branch if C013 != 001 */
3892                         if (cfg->opt & MONO_OPT_FCMOV) {
3893                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3894                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3895                                 break;
3896                         }
3897                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3898                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3899                         break;
3900                 case CEE_CKFINITE: {
3901                         x86_push_reg (code, X86_EAX);
3902                         x86_fxam (code);
3903                         x86_fnstsw (code);
3904                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3905                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3906                         x86_pop_reg (code, X86_EAX);
3907                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3908                         break;
3909                 }
3910                 case OP_TLS_GET: {
3911                         x86_prefix (code, X86_GS_PREFIX);
3912                         x86_mov_reg_mem (code, ins->dreg, ins->inst_offset, 4);                 
3913                         break;
3914                 }
3915                 case OP_ATOMIC_ADD_I4: {
3916                         int dreg = ins->dreg;
3917
3918                         if (dreg == ins->inst_basereg) {
3919                                 x86_push_reg (code, ins->sreg2);
3920                                 dreg = ins->sreg2;
3921                         } 
3922                         
3923                         if (dreg != ins->sreg2)
3924                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3925
3926                         x86_prefix (code, X86_LOCK_PREFIX);
3927                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3928
3929                         if (dreg != ins->dreg) {
3930                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3931                                 x86_pop_reg (code, dreg);
3932                         }
3933
3934                         break;
3935                 }
3936                 case OP_ATOMIC_ADD_NEW_I4: {
3937                         int dreg = ins->dreg;
3938
3939                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3940                         if (ins->sreg2 == dreg) {
3941                                 if (dreg == X86_EBX) {
3942                                         dreg = X86_EDI;
3943                                         if (ins->inst_basereg == X86_EDI)
3944                                                 dreg = X86_ESI;
3945                                 } else {
3946                                         dreg = X86_EBX;
3947                                         if (ins->inst_basereg == X86_EBX)
3948                                                 dreg = X86_EDI;
3949                                 }
3950                         } else if (ins->inst_basereg == dreg) {
3951                                 if (dreg == X86_EBX) {
3952                                         dreg = X86_EDI;
3953                                         if (ins->sreg2 == X86_EDI)
3954                                                 dreg = X86_ESI;
3955                                 } else {
3956                                         dreg = X86_EBX;
3957                                         if (ins->sreg2 == X86_EBX)
3958                                                 dreg = X86_EDI;
3959                                 }
3960                         }
3961
3962                         if (dreg != ins->dreg) {
3963                                 x86_push_reg (code, dreg);
3964                         }
3965
3966                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3967                         x86_prefix (code, X86_LOCK_PREFIX);
3968                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3969                         /* dreg contains the old value, add with sreg2 value */
3970                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3971                         
3972                         if (ins->dreg != dreg) {
3973                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3974                                 x86_pop_reg (code, dreg);
3975                         }
3976
3977                         break;
3978                 }
3979                 case OP_ATOMIC_EXCHANGE_I4: {
3980                         guchar *br[2];
3981                         int sreg2 = ins->sreg2;
3982                         int breg = ins->inst_basereg;
3983
3984                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3985                          * hack to overcome limits in x86 reg allocator 
3986                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3987                          */
3988                         if (ins->dreg != X86_EAX)
3989                                 x86_push_reg (code, X86_EAX);
3990                         
3991                         /* We need the EAX reg for the cmpxchg */
3992                         if (ins->sreg2 == X86_EAX) {
3993                                 x86_push_reg (code, X86_EDX);
3994                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3995                                 sreg2 = X86_EDX;
3996                         }
3997
3998                         if (breg == X86_EAX) {
3999                                 x86_push_reg (code, X86_ESI);
4000                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
4001                                 breg = X86_ESI;
4002                         }
4003
4004                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
4005
4006                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
4007                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
4008                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
4009                         x86_patch (br [1], br [0]);
4010
4011                         if (breg != ins->inst_basereg)
4012                                 x86_pop_reg (code, X86_ESI);
4013
4014                         if (ins->dreg != X86_EAX) {
4015                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
4016                                 x86_pop_reg (code, X86_EAX);
4017                         }
4018
4019                         if (ins->sreg2 != sreg2)
4020                                 x86_pop_reg (code, X86_EDX);
4021
4022                         break;
4023                 }
4024                 default:
4025                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
4026                         g_assert_not_reached ();
4027                 }
4028
4029                 if ((code - cfg->native_code - offset) > max_len) {
4030                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4031                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4032                         g_assert_not_reached ();
4033                 }
4034                
4035                 cpos += max_len;
4036
4037                 last_ins = ins;
4038                 last_offset = offset;
4039                 
4040                 ins = ins->next;
4041         }
4042
4043         cfg->code_len = code - cfg->native_code;
4044 }
4045
4046 void
4047 mono_arch_register_lowlevel_calls (void)
4048 {
4049 }
4050
4051 void
4052 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4053 {
4054         MonoJumpInfo *patch_info;
4055         gboolean compile_aot = !run_cctors;
4056
4057         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4058                 unsigned char *ip = patch_info->ip.i + code;
4059                 const unsigned char *target;
4060
4061                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4062
4063                 if (compile_aot) {
4064                         switch (patch_info->type) {
4065                         case MONO_PATCH_INFO_BB:
4066                         case MONO_PATCH_INFO_LABEL:
4067                                 break;
4068                         default:
4069                                 /* No need to patch these */
4070                                 continue;
4071                         }
4072                 }
4073
4074                 switch (patch_info->type) {
4075                 case MONO_PATCH_INFO_IP:
4076                         *((gconstpointer *)(ip)) = target;
4077                         break;
4078                 case MONO_PATCH_INFO_CLASS_INIT: {
4079                         guint8 *code = ip;
4080                         /* Might already been changed to a nop */
4081                         x86_call_code (code, 0);
4082                         x86_patch (ip, target);
4083                         break;
4084                 }
4085                 case MONO_PATCH_INFO_ABS:
4086                 case MONO_PATCH_INFO_METHOD:
4087                 case MONO_PATCH_INFO_METHOD_JUMP:
4088                 case MONO_PATCH_INFO_INTERNAL_METHOD:
4089                 case MONO_PATCH_INFO_BB:
4090                 case MONO_PATCH_INFO_LABEL:
4091                         x86_patch (ip, target);
4092                         break;
4093                 case MONO_PATCH_INFO_NONE:
4094                         break;
4095                 default: {
4096                         guint32 offset = mono_arch_get_patch_offset (ip);
4097                         *((gconstpointer *)(ip + offset)) = target;
4098                         break;
4099                 }
4100                 }
4101         }
4102 }
4103
4104 guint8 *
4105 mono_arch_emit_prolog (MonoCompile *cfg)
4106 {
4107         MonoMethod *method = cfg->method;
4108         MonoBasicBlock *bb;
4109         MonoMethodSignature *sig;
4110         MonoInst *inst;
4111         int alloc_size, pos, max_offset, i;
4112         guint8 *code;
4113
4114         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
4115         code = cfg->native_code = g_malloc (cfg->code_size);
4116
4117         x86_push_reg (code, X86_EBP);
4118         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4119
4120         alloc_size = - cfg->stack_offset;
4121         pos = 0;
4122
4123         if (method->save_lmf) {
4124                 pos += sizeof (MonoLMF);
4125
4126                 /* save the current IP */
4127                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4128                 x86_push_imm_template (code);
4129
4130                 /* save all caller saved regs */
4131                 x86_push_reg (code, X86_EBP);
4132                 x86_push_reg (code, X86_ESI);
4133                 x86_push_reg (code, X86_EDI);
4134                 x86_push_reg (code, X86_EBX);
4135
4136                 /* save method info */
4137                 x86_push_imm (code, method);
4138
4139                 /* get the address of lmf for the current thread */
4140                 /* 
4141                  * This is performance critical so we try to use some tricks to make
4142                  * it fast.
4143                  */
4144                 if (lmf_tls_offset != -1) {
4145                         /* Load lmf quicky using the GS register */
4146                         x86_prefix (code, X86_GS_PREFIX);
4147                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
4148                 }
4149                 else {
4150                         if (cfg->compile_aot) {
4151                                 /* The GOT var does not exist yet */
4152                                 x86_call_imm (code, 0);
4153                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4154                                 x86_pop_reg (code, X86_EAX);
4155                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4156                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4157                                 x86_call_membase (code, X86_EAX, 0xf0f0f0f0);
4158                         }
4159                         else
4160                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4161                 }
4162
4163                 /* push lmf */
4164                 x86_push_reg (code, X86_EAX); 
4165                 /* push *lfm (previous_lmf) */
4166                 x86_push_membase (code, X86_EAX, 0);
4167                 /* *(lmf) = ESP */
4168                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4169         } else {
4170
4171                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4172                         x86_push_reg (code, X86_EBX);
4173                         pos += 4;
4174                 }
4175
4176                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4177                         x86_push_reg (code, X86_EDI);
4178                         pos += 4;
4179                 }
4180
4181                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4182                         x86_push_reg (code, X86_ESI);
4183                         pos += 4;
4184                 }
4185         }
4186
4187         alloc_size -= pos;
4188
4189         if (alloc_size) {
4190                 /* See mono_emit_stack_alloc */
4191 #ifdef PLATFORM_WIN32
4192                 guint32 remaining_size = alloc_size;
4193                 while (remaining_size >= 0x1000) {
4194                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4195                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4196                         remaining_size -= 0x1000;
4197                 }
4198                 if (remaining_size)
4199                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4200 #else
4201                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4202 #endif
4203         }
4204
4205         /* compute max_offset in order to use short forward jumps */
4206         max_offset = 0;
4207         if (cfg->opt & MONO_OPT_BRANCH) {
4208                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4209                         MonoInst *ins = bb->code;
4210                         bb->max_offset = max_offset;
4211
4212                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4213                                 max_offset += 6;
4214                         /* max alignment for loops */
4215                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4216                                 max_offset += LOOP_ALIGNMENT;
4217
4218                         while (ins) {
4219                                 if (ins->opcode == OP_LABEL)
4220                                         ins->inst_c1 = max_offset;
4221                                 
4222                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
4223                                 ins = ins->next;
4224                         }
4225                 }
4226         }
4227
4228         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4229                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4230
4231         /* load arguments allocated to register from the stack */
4232         sig = mono_method_signature (method);
4233         pos = 0;
4234
4235         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4236                 inst = cfg->varinfo [pos];
4237                 if (inst->opcode == OP_REGVAR) {
4238                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4239                         if (cfg->verbose_level > 2)
4240                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4241                 }
4242                 pos++;
4243         }
4244
4245         cfg->code_len = code - cfg->native_code;
4246
4247         return code;
4248 }
4249
4250 void
4251 mono_arch_emit_epilog (MonoCompile *cfg)
4252 {
4253         MonoMethod *method = cfg->method;
4254         MonoMethodSignature *sig = mono_method_signature (method);
4255         int pos;
4256         guint32 stack_to_pop;
4257         guint8 *code;
4258         int max_epilog_size = 16;
4259         
4260         if (cfg->method->save_lmf)
4261                 max_epilog_size += 128;
4262         
4263         if (mono_jit_trace_calls != NULL)
4264                 max_epilog_size += 50;
4265
4266         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4267                 cfg->code_size *= 2;
4268                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4269                 mono_jit_stats.code_reallocs++;
4270         }
4271
4272         code = cfg->native_code + cfg->code_len;
4273
4274         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4275                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4276
4277         /* the code restoring the registers must be kept in sync with CEE_JMP */
4278         pos = 0;
4279         
4280         if (method->save_lmf) {
4281                 gint32 prev_lmf_reg;
4282
4283                 /* Find a spare register */
4284                 switch (sig->ret->type) {
4285                 case MONO_TYPE_I8:
4286                 case MONO_TYPE_U8:
4287                         prev_lmf_reg = X86_EDI;
4288                         cfg->used_int_regs |= (1 << X86_EDI);
4289                         break;
4290                 default:
4291                         prev_lmf_reg = X86_EDX;
4292                         break;
4293                 }
4294
4295                 /* reg = previous_lmf */
4296                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, -32, 4);
4297
4298                 /* ecx = lmf */
4299                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, -28, 4);
4300
4301                 /* *(lmf) = previous_lmf */
4302                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4303
4304                 /* restore caller saved regs */
4305                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4306                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, -20, 4);
4307                 }
4308
4309                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4310                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, -16, 4);
4311                 }
4312                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4313                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, -12, 4);
4314                 }
4315
4316                 /* EBP is restored by LEAVE */
4317         } else {
4318                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4319                         pos -= 4;
4320                 }
4321                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4322                         pos -= 4;
4323                 }
4324                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4325                         pos -= 4;
4326                 }
4327
4328                 if (pos)
4329                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4330
4331                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4332                         x86_pop_reg (code, X86_ESI);
4333                 }
4334                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4335                         x86_pop_reg (code, X86_EDI);
4336                 }
4337                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4338                         x86_pop_reg (code, X86_EBX);
4339                 }
4340         }
4341
4342         x86_leave (code);
4343
4344         if (CALLCONV_IS_STDCALL (sig->call_convention)) {
4345                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4346
4347                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4348         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret))
4349                 stack_to_pop = 4;
4350         else
4351                 stack_to_pop = 0;
4352
4353         if (stack_to_pop)
4354                 x86_ret_imm (code, stack_to_pop);
4355         else
4356                 x86_ret (code);
4357
4358         cfg->code_len = code - cfg->native_code;
4359
4360         g_assert (cfg->code_len < cfg->code_size);
4361
4362 }
4363
4364 void
4365 mono_arch_emit_exceptions (MonoCompile *cfg)
4366 {
4367         MonoJumpInfo *patch_info;
4368         int nthrows, i;
4369         guint8 *code;
4370         MonoClass *exc_classes [16];
4371         guint8 *exc_throw_start [16], *exc_throw_end [16];
4372         guint32 code_size;
4373         int exc_count = 0;
4374
4375         /* Compute needed space */
4376         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4377                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4378                         exc_count++;
4379         }
4380
4381         /* 
4382          * make sure we have enough space for exceptions
4383          * 16 is the size of two push_imm instructions and a call
4384          */
4385         if (cfg->compile_aot)
4386                 code_size = exc_count * 32;
4387         else
4388                 code_size = exc_count * 16;
4389
4390         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4391                 cfg->code_size *= 2;
4392                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4393                 mono_jit_stats.code_reallocs++;
4394         }
4395
4396         code = cfg->native_code + cfg->code_len;
4397
4398         nthrows = 0;
4399         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4400                 switch (patch_info->type) {
4401                 case MONO_PATCH_INFO_EXC: {
4402                         MonoClass *exc_class;
4403                         guint8 *buf, *buf2;
4404                         guint32 throw_ip;
4405
4406                         x86_patch (patch_info->ip.i + cfg->native_code, code);
4407
4408                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4409                         g_assert (exc_class);
4410                         throw_ip = patch_info->ip.i;
4411
4412                         /* Find a throw sequence for the same exception class */
4413                         for (i = 0; i < nthrows; ++i)
4414                                 if (exc_classes [i] == exc_class)
4415                                         break;
4416                         if (i < nthrows) {
4417                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4418                                 x86_jump_code (code, exc_throw_start [i]);
4419                                 patch_info->type = MONO_PATCH_INFO_NONE;
4420                         }
4421                         else {
4422                                 guint32 got_reg = X86_EAX;
4423                                 guint32 size;
4424
4425                                 /* Compute size of code following the push <OFFSET> */
4426                                 if (cfg->compile_aot) {
4427                                         size = 5 + 6;
4428                                         if (!cfg->got_var)
4429                                                 size += 32;
4430                                         else if (cfg->got_var->opcode == OP_REGOFFSET)
4431                                                 size += 6;
4432                                 }
4433                                 else
4434                                         size = 5 + 5;
4435
4436                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4437                                         /* Use the shorter form */
4438                                         buf = buf2 = code;
4439                                         x86_push_imm (code, 0);
4440                                 }
4441                                 else {
4442                                         buf = code;
4443                                         x86_push_imm (code, 0xf0f0f0f0);
4444                                         buf2 = code;
4445                                 }
4446
4447                                 if (nthrows < 16) {
4448                                         exc_classes [nthrows] = exc_class;
4449                                         exc_throw_start [nthrows] = code;
4450                                 }
4451
4452                                 if (cfg->compile_aot) {          
4453                                         /*
4454                                          * Since the patches are generated by the back end, there is                                     * no way to generate a got_var at this point.   
4455                                          */
4456                                         if (!cfg->got_var) {
4457                                                 x86_call_imm (code, 0);
4458                                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4459                                                 x86_pop_reg (code, X86_EAX);
4460                                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4461                                         }
4462                                         else {
4463                                                 if (cfg->got_var->opcode == OP_REGOFFSET)
4464                                                         x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
4465                                                 else
4466                                                         got_reg = cfg->got_var->dreg;
4467                                         }
4468                                 }
4469
4470                                 x86_push_imm (code, exc_class->type_token);
4471                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4472                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4473                                 patch_info->ip.i = code - cfg->native_code;
4474                                 if (cfg->compile_aot)
4475                                         x86_call_membase (code, got_reg, 0xf0f0f0f0);
4476                                 else
4477                                         x86_call_code (code, 0);
4478                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4479                                 while (buf < buf2)
4480                                         x86_nop (buf);
4481
4482                                 if (nthrows < 16) {
4483                                         exc_throw_end [nthrows] = code;
4484                                         nthrows ++;
4485                                 }
4486                         }
4487                         break;
4488                 }
4489                 default:
4490                         /* do nothing */
4491                         break;
4492                 }
4493         }
4494
4495         cfg->code_len = code - cfg->native_code;
4496
4497         g_assert (cfg->code_len < cfg->code_size);
4498 }
4499
4500 void
4501 mono_arch_flush_icache (guint8 *code, gint size)
4502 {
4503         /* not needed */
4504 }
4505
4506 void
4507 mono_arch_flush_register_windows (void)
4508 {
4509 }
4510
4511 /*
4512  * Support for fast access to the thread-local lmf structure using the GS
4513  * segment register on NPTL + kernel 2.6.x.
4514  */
4515
4516 static gboolean tls_offset_inited = FALSE;
4517
4518 /* code should be simply return <tls var>; */
4519 static int read_tls_offset_from_method (void* method)
4520 {
4521         guint8* code = (guint8*) method;
4522         /* 
4523          * Determine the offset of the variable inside the TLS structures
4524          * by disassembling the function.
4525          */
4526
4527         /* gcc-3.3.2
4528          *
4529          * push ebp
4530          * mov ebp, esp
4531          * mov eax, gs:0
4532          * mov eax, DWORD PTR [eax+<offset>]
4533          */
4534         if (
4535                 (code [0] == 0x55) && (code [1] == 0x89) && (code [2] == 0xe5) &&
4536                 (code [3] == 0x65) && (code [4] == 0xa1) && (code [5] == 0x00) &&
4537                 (code [6] == 0x00) && (code [7] == 0x00) && (code [8] == 0x00) &&
4538                 (code [9] == 0x8b) && (code [10] == 0x80)) {
4539                 return *(int*)&(code [11]);
4540         }
4541         
4542         /* gcc-3.4
4543          *
4544          * push ebp
4545          * mov ebp, esp
4546          * mov eax, gs:<offset>
4547          */
4548         if (
4549                 (code [0] == 0x55) && (code [1] == 0x89) && (code [2] == 0xe5) &&
4550                 (code [3] == 0x65) && (code [4] == 0xa1)) {
4551                 return *(int*)&(code [5]);
4552         }
4553         
4554         /* 3.2.2 with -march=athlon
4555          *
4556          * push ebp
4557          * mov eax, gs:<offset>
4558          * mov ebp, esp
4559          */
4560         if (
4561                 (code [0] == 0x55) && (code [1] == 0x65) && (code [2] == 0xa1)) {
4562                 return *(int*)&(code [3]);
4563         }
4564         
4565         return -1;
4566 }
4567 void
4568 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4569 {
4570 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4571         pthread_t self = pthread_self();
4572         pthread_attr_t attr;
4573         void *staddr = NULL;
4574         size_t stsize = 0;
4575         struct sigaltstack sa;
4576 #endif
4577
4578         if (!tls_offset_inited) {
4579                 tls_offset_inited = TRUE;
4580                 if (getenv ("MONO_NPTL")) {
4581                         lmf_tls_offset = read_tls_offset_from_method (mono_get_lmf_addr);
4582                         appdomain_tls_offset = read_tls_offset_from_method (mono_domain_get);
4583                         thread_tls_offset = read_tls_offset_from_method (mono_thread_current);
4584                 }
4585         }               
4586
4587 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4588
4589         /* Determine stack boundaries */
4590         if (!mono_running_on_valgrind ()) {
4591 #ifdef HAVE_PTHREAD_GETATTR_NP
4592                 pthread_getattr_np( self, &attr );
4593 #else
4594 #ifdef HAVE_PTHREAD_ATTR_GET_NP
4595                 pthread_attr_get_np( self, &attr );
4596 #elif defined(sun)
4597                 pthread_attr_init( &attr );
4598                 pthread_attr_getstacksize( &attr, &stsize );
4599 #else
4600 #error "Not implemented"
4601 #endif
4602 #endif
4603 #ifndef sun
4604                 pthread_attr_getstack( &attr, &staddr, &stsize );
4605 #endif
4606         }
4607
4608         /* 
4609          * staddr seems to be wrong for the main thread, so we keep the value in
4610          * tls->end_of_stack
4611          */
4612         tls->stack_size = stsize;
4613
4614         /* Setup an alternate signal stack */
4615         tls->signal_stack = g_malloc (SIGNAL_STACK_SIZE);
4616         tls->signal_stack_size = SIGNAL_STACK_SIZE;
4617
4618         sa.ss_sp = tls->signal_stack;
4619         sa.ss_size = SIGNAL_STACK_SIZE;
4620         sa.ss_flags = SS_ONSTACK;
4621         sigaltstack (&sa, NULL);
4622 #endif
4623 }
4624
4625 void
4626 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4627 {
4628 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4629         struct sigaltstack sa;
4630
4631         sa.ss_sp = tls->signal_stack;
4632         sa.ss_size = SIGNAL_STACK_SIZE;
4633         sa.ss_flags = SS_DISABLE;
4634         sigaltstack  (&sa, NULL);
4635
4636         if (tls->signal_stack)
4637                 g_free (tls->signal_stack);
4638 #endif
4639 }
4640
4641 void
4642 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4643 {
4644
4645         /* add the this argument */
4646         if (this_reg != -1) {
4647                 MonoInst *this;
4648                 MONO_INST_NEW (cfg, this, OP_OUTARG);
4649                 this->type = this_type;
4650                 this->sreg1 = this_reg;
4651                 mono_bblock_add_inst (cfg->cbb, this);
4652         }
4653
4654         if (vt_reg != -1) {
4655                 CallInfo * cinfo = get_call_info (inst->signature, FALSE);
4656                 MonoInst *vtarg;
4657
4658                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4659                         /*
4660                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4661                          * the stack. Save the address here, so the call instruction can
4662                          * access it.
4663                          */
4664                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4665                         vtarg->inst_destbasereg = X86_ESP;
4666                         vtarg->inst_offset = inst->stack_usage;
4667                         vtarg->sreg1 = vt_reg;
4668                         mono_bblock_add_inst (cfg->cbb, vtarg);
4669                 }
4670                 else {
4671                         MonoInst *vtarg;
4672                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4673                         vtarg->type = STACK_MP;
4674                         vtarg->sreg1 = vt_reg;
4675                         mono_bblock_add_inst (cfg->cbb, vtarg);
4676                 }
4677
4678                 g_free (cinfo);
4679         }
4680 }
4681
4682
4683 MonoInst*
4684 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4685 {
4686         MonoInst *ins = NULL;
4687
4688         if (cmethod->klass == mono_defaults.math_class) {
4689                 if (strcmp (cmethod->name, "Sin") == 0) {
4690                         MONO_INST_NEW (cfg, ins, OP_SIN);
4691                         ins->inst_i0 = args [0];
4692                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4693                         MONO_INST_NEW (cfg, ins, OP_COS);
4694                         ins->inst_i0 = args [0];
4695                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4696                         MONO_INST_NEW (cfg, ins, OP_TAN);
4697                         ins->inst_i0 = args [0];
4698                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4699                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4700                         ins->inst_i0 = args [0];
4701                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4702                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4703                         ins->inst_i0 = args [0];
4704                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4705                         MONO_INST_NEW (cfg, ins, OP_ABS);
4706                         ins->inst_i0 = args [0];
4707                 }
4708 #if 0
4709                 /* OP_FREM is not IEEE compatible */
4710                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4711                         MONO_INST_NEW (cfg, ins, OP_FREM);
4712                         ins->inst_i0 = args [0];
4713                         ins->inst_i1 = args [1];
4714                 }
4715 #endif
4716         } else if(cmethod->klass->image == mono_defaults.corlib &&
4717                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4718                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4719
4720                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4721                         MonoInst *ins_iconst;
4722
4723                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4724                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4725                         ins_iconst->inst_c0 = 1;
4726
4727                         ins->inst_i0 = args [0];
4728                         ins->inst_i1 = ins_iconst;
4729                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4730                         MonoInst *ins_iconst;
4731
4732                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4733                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4734                         ins_iconst->inst_c0 = -1;
4735
4736                         ins->inst_i0 = args [0];
4737                         ins->inst_i1 = ins_iconst;
4738                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4739                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4740
4741                         ins->inst_i0 = args [0];
4742                         ins->inst_i1 = args [1];
4743                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4744                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_I4);
4745
4746                         ins->inst_i0 = args [0];
4747                         ins->inst_i1 = args [1];
4748                 }
4749         }
4750
4751         return ins;
4752 }
4753
4754
4755 gboolean
4756 mono_arch_print_tree (MonoInst *tree, int arity)
4757 {
4758         return 0;
4759 }
4760
4761 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4762 {
4763         MonoInst* ins;
4764         
4765         if (appdomain_tls_offset == -1)
4766                 return NULL;
4767         
4768         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4769         ins->inst_offset = appdomain_tls_offset;
4770         return ins;
4771 }
4772
4773 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4774 {
4775         MonoInst* ins;
4776         
4777         if (thread_tls_offset == -1)
4778                 return NULL;
4779         
4780         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4781         ins->inst_offset = thread_tls_offset;
4782         return ins;
4783 }
4784
4785 guint32
4786 mono_arch_get_patch_offset (guint8 *code)
4787 {
4788         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4789                 return 2;
4790         else if ((code [0] == 0xba))
4791                 return 1;
4792         else if ((code [0] == 0x68))
4793                 /* push IMM */
4794                 return 1;
4795         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4796                 /* push <OFFSET>(<REG>) */
4797                 return 2;
4798         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4799                 /* call *<OFFSET>(<REG>) */
4800                 return 2;
4801         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4802                 /* fldl <ADDR> */
4803                 return 2;
4804         else if ((code [0] == 0x58) && (code [1] == 0x05))
4805                 /* pop %eax; add <OFFSET>, %eax */
4806                 return 2;
4807         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4808                 /* pop <REG>; add <OFFSET>, <REG> */
4809                 return 3;
4810         else {
4811                 g_assert_not_reached ();
4812                 return -1;
4813         }
4814 }