Fri Dec 19 17:58:28 CET 2003 Paolo Molaro <lupus@ximian.com>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *
8  * (C) 2003 Ximian, Inc.
9  */
10 #include "mini.h"
11 #include <string.h>
12 #include <math.h>
13
14 #include <mono/metadata/appdomain.h>
15 #include <mono/metadata/debug-helpers.h>
16 #include <mono/metadata/profiler-private.h>
17 #include <mono/utils/mono-math.h>
18
19 #include "mini-x86.h"
20 #include "inssel.h"
21 #include "cpu-pentium.h"
22
23 static gint lmf_tls_offset = -1;
24
25 const char*
26 mono_arch_regname (int reg) {
27         switch (reg) {
28         case X86_EAX: return "%eax";
29         case X86_EBX: return "%ebx";
30         case X86_ECX: return "%ecx";
31         case X86_EDX: return "%edx";
32         case X86_ESP: return "%esp";
33         case X86_EBP: return "%ebp";
34         case X86_EDI: return "%edi";
35         case X86_ESI: return "%esi";
36         }
37         return "unknown";
38 }
39
40 typedef struct {
41         guint16 size;
42         guint16 offset;
43         guint8  pad;
44 } MonoJitArgumentInfo;
45
46 /*
47  * arch_get_argument_info:
48  * @csig:  a method signature
49  * @param_count: the number of parameters to consider
50  * @arg_info: an array to store the result infos
51  *
52  * Gathers information on parameters such as size, alignment and
53  * padding. arg_info should be large enought to hold param_count + 1 entries. 
54  *
55  * Returns the size of the activation frame.
56  */
57 static int
58 arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
59 {
60         int k, frame_size = 0;
61         int size, align, pad;
62         int offset = 8;
63
64         if (MONO_TYPE_ISSTRUCT (csig->ret)) { 
65                 frame_size += sizeof (gpointer);
66                 offset += 4;
67         }
68
69         arg_info [0].offset = offset;
70
71         if (csig->hasthis) {
72                 frame_size += sizeof (gpointer);
73                 offset += 4;
74         }
75
76         arg_info [0].size = frame_size;
77
78         for (k = 0; k < param_count; k++) {
79                 
80                 if (csig->pinvoke)
81                         size = mono_type_native_stack_size (csig->params [k], &align);
82                 else
83                         size = mono_type_stack_size (csig->params [k], &align);
84
85                 /* ignore alignment for now */
86                 align = 1;
87
88                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
89                 arg_info [k].pad = pad;
90                 frame_size += size;
91                 arg_info [k + 1].pad = 0;
92                 arg_info [k + 1].size = size;
93                 offset += pad;
94                 arg_info [k + 1].offset = offset;
95                 offset += size;
96         }
97
98         align = MONO_ARCH_FRAME_ALIGNMENT;
99         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
100         arg_info [k].pad = pad;
101
102         return frame_size;
103 }
104
105 static int indent_level = 0;
106
107 static void indent (int diff) {
108         int v = indent_level;
109         while (v-- > 0) {
110                 printf (". ");
111         }
112         indent_level += diff;
113 }
114
115 static gboolean enable_trace = TRUE;
116
117 static void
118 enter_method (MonoMethod *method, char *ebp)
119 {
120         int i, j;
121         MonoClass *class;
122         MonoObject *o;
123         MonoJitArgumentInfo *arg_info;
124         MonoMethodSignature *sig;
125         char *fname;
126
127         if (!enable_trace)
128                 return;
129
130         fname = mono_method_full_name (method, TRUE);
131         indent (1);
132         printf ("ENTER: %s(", fname);
133         g_free (fname);
134         
135         if (((int)ebp & (MONO_ARCH_FRAME_ALIGNMENT - 1)) != 0) {
136                 g_error ("unaligned stack detected (%p)", ebp);
137         }
138
139         sig = method->signature;
140
141         arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
142
143         arch_get_argument_info (sig, sig->param_count, arg_info);
144
145         if (MONO_TYPE_ISSTRUCT (method->signature->ret)) {
146                 g_assert (!method->signature->ret->byref);
147
148                 printf ("VALUERET:%p, ", *((gpointer *)(ebp + 8)));
149         }
150
151         if (method->signature->hasthis) {
152                 gpointer *this = (gpointer *)(ebp + arg_info [0].offset);
153                 if (method->klass->valuetype) {
154                         printf ("value:%p, ", *this);
155                 } else {
156                         o = *((MonoObject **)this);
157
158                         if (o) {
159                                 class = o->vtable->klass;
160
161                                 if (class == mono_defaults.string_class) {
162                                         printf ("this:[STRING:%p:%s], ", o, mono_string_to_utf8 ((MonoString *)o));
163                                 } else {
164                                         printf ("this:%p[%s.%s %s], ", o, class->name_space, class->name, o->vtable->domain->friendly_name);
165                                 }
166                         } else 
167                                 printf ("this:NULL, ");
168                 }
169         }
170
171         for (i = 0; i < method->signature->param_count; ++i) {
172                 gpointer *cpos = (gpointer *)(ebp + arg_info [i + 1].offset);
173                 int size = arg_info [i + 1].size;
174
175                 MonoType *type = method->signature->params [i];
176                 
177                 if (type->byref) {
178                         printf ("[BYREF:%p], ", *cpos); 
179                 } else switch (type->type) {
180                         
181                 case MONO_TYPE_I:
182                 case MONO_TYPE_U:
183                         printf ("%p, ", (gpointer)*((int *)(cpos)));
184                         break;
185                 case MONO_TYPE_BOOLEAN:
186                 case MONO_TYPE_CHAR:
187                 case MONO_TYPE_I1:
188                 case MONO_TYPE_U1:
189                 case MONO_TYPE_I2:
190                 case MONO_TYPE_U2:
191                 case MONO_TYPE_I4:
192                 case MONO_TYPE_U4:
193                         printf ("%d, ", *((int *)(cpos)));
194                         break;
195                 case MONO_TYPE_STRING: {
196                         MonoString *s = *((MonoString **)cpos);
197                         if (s) {
198                                 g_assert (((MonoObject *)s)->vtable->klass == mono_defaults.string_class);
199                                 printf ("[STRING:%p:%s], ", s, mono_string_to_utf8 (s));
200                         } else 
201                                 printf ("[STRING:null], ");
202                         break;
203                 }
204                 case MONO_TYPE_CLASS:
205                 case MONO_TYPE_OBJECT: {
206                         o = *((MonoObject **)cpos);
207                         if (o) {
208                                 class = o->vtable->klass;
209                     
210                                 if (class == mono_defaults.string_class) {
211                                         printf ("[STRING:%p:%s], ", o, mono_string_to_utf8 ((MonoString *)o));
212                                 } else if (class == mono_defaults.int32_class) {
213                                         printf ("[INT32:%p:%d], ", o, *(gint32 *)((char *)o + sizeof (MonoObject)));
214                                 } else
215                                         printf ("[%s.%s:%p], ", class->name_space, class->name, o);
216                         } else {
217                                 printf ("%p, ", *((gpointer *)(cpos)));                         
218                         }
219                         break;
220                 }
221                 case MONO_TYPE_PTR:
222                 case MONO_TYPE_FNPTR:
223                 case MONO_TYPE_ARRAY:
224                 case MONO_TYPE_SZARRAY:
225                         printf ("%p, ", *((gpointer *)(cpos)));
226                         break;
227                 case MONO_TYPE_I8:
228                 case MONO_TYPE_U8:
229                         printf ("0x%016llx, ", *((gint64 *)(cpos)));
230                         break;
231                 case MONO_TYPE_R4:
232                         printf ("%f, ", *((float *)(cpos)));
233                         break;
234                 case MONO_TYPE_R8:
235                         printf ("%f, ", *((double *)(cpos)));
236                         break;
237                 case MONO_TYPE_VALUETYPE: 
238                         printf ("[");
239                         for (j = 0; j < size; j++)
240                                 printf ("%02x,", *((guint8*)cpos +j));
241                         printf ("], ");
242                         break;
243                 default:
244                         printf ("XX, ");
245                 }
246         }
247
248         printf (")\n");
249 }
250
251 static void
252 leave_method (MonoMethod *method, ...)
253 {
254         MonoType *type;
255         char *fname;
256         va_list ap;
257
258         if (!enable_trace)
259                 return;
260
261         va_start(ap, method);
262
263         fname = mono_method_full_name (method, TRUE);
264         indent (-1);
265         printf ("LEAVE: %s", fname);
266         g_free (fname);
267
268         type = method->signature->ret;
269
270 handle_enum:
271         switch (type->type) {
272         case MONO_TYPE_VOID:
273                 break;
274         case MONO_TYPE_BOOLEAN: {
275                 int eax = va_arg (ap, int);
276                 if (eax)
277                         printf ("TRUE:%d", eax);
278                 else 
279                         printf ("FALSE");
280                         
281                 break;
282         }
283         case MONO_TYPE_CHAR:
284         case MONO_TYPE_I1:
285         case MONO_TYPE_U1:
286         case MONO_TYPE_I2:
287         case MONO_TYPE_U2:
288         case MONO_TYPE_I4:
289         case MONO_TYPE_U4:
290         case MONO_TYPE_I:
291         case MONO_TYPE_U: {
292                 int eax = va_arg (ap, int);
293                 printf ("EAX=%d", eax);
294                 break;
295         }
296         case MONO_TYPE_STRING: {
297                 MonoString *s = va_arg (ap, MonoString *);
298 ;
299                 if (s) {
300                         g_assert (((MonoObject *)s)->vtable->klass == mono_defaults.string_class);
301                         printf ("[STRING:%p:%s]", s, mono_string_to_utf8 (s));
302                 } else 
303                         printf ("[STRING:null], ");
304                 break;
305         }
306         case MONO_TYPE_CLASS: 
307         case MONO_TYPE_OBJECT: {
308                 MonoObject *o = va_arg (ap, MonoObject *);
309
310                 if (o) {
311                         if (o->vtable->klass == mono_defaults.boolean_class) {
312                                 printf ("[BOOLEAN:%p:%d]", o, *((guint8 *)o + sizeof (MonoObject)));            
313                         } else if  (o->vtable->klass == mono_defaults.int32_class) {
314                                 printf ("[INT32:%p:%d]", o, *((gint32 *)((char *)o + sizeof (MonoObject))));    
315                         } else if  (o->vtable->klass == mono_defaults.int64_class) {
316                                 printf ("[INT64:%p:%lld]", o, *((gint64 *)((char *)o + sizeof (MonoObject))));  
317                         } else
318                                 printf ("[%s.%s:%p]", o->vtable->klass->name_space, o->vtable->klass->name, o);
319                 } else
320                         printf ("[OBJECT:%p]", o);
321                
322                 break;
323         }
324         case MONO_TYPE_PTR:
325         case MONO_TYPE_FNPTR:
326         case MONO_TYPE_ARRAY:
327         case MONO_TYPE_SZARRAY: {
328                 gpointer p = va_arg (ap, gpointer);
329                 printf ("EAX=%p", p);
330                 break;
331         }
332         case MONO_TYPE_I8: {
333                 gint64 l =  va_arg (ap, gint64);
334                 printf ("EAX/EDX=0x%16llx", l);
335                 break;
336         }
337         case MONO_TYPE_U8: {
338                 gint64 l =  va_arg (ap, gint64);
339                 printf ("EAX/EDX=0x%16llx", l);
340                 break;
341         }
342         case MONO_TYPE_R8: {
343                 double f = va_arg (ap, double);
344                 printf ("FP=%f\n", f);
345                 break;
346         }
347         case MONO_TYPE_VALUETYPE: 
348                 if (type->data.klass->enumtype) {
349                         type = type->data.klass->enum_basetype;
350                         goto handle_enum;
351                 } else {
352                         guint8 *p = va_arg (ap, gpointer);
353                         int j, size, align;
354                         size = mono_type_size (type, &align);
355                         printf ("[");
356                         for (j = 0; p && j < size; j++)
357                                 printf ("%02x,", p [j]);
358                         printf ("]");
359                 }
360                 break;
361         default:
362                 printf ("(unknown return type %x)", method->signature->ret->type);
363         }
364
365         printf ("\n");
366 }
367
368 static const guchar cpuid_impl [] = {
369         0x55,                           /* push   %ebp */
370         0x89, 0xe5,                     /* mov    %esp,%ebp */
371         0x53,                           /* push   %ebx */
372         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
373         0x0f, 0xa2,                     /* cpuid   */
374         0x50,                           /* push   %eax */
375         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
376         0x89, 0x18,                     /* mov    %ebx,(%eax) */
377         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
378         0x89, 0x08,                     /* mov    %ecx,(%eax) */
379         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
380         0x89, 0x10,                     /* mov    %edx,(%eax) */
381         0x58,                           /* pop    %eax */
382         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
383         0x89, 0x02,                     /* mov    %eax,(%edx) */
384         0x5b,                           /* pop    %ebx */
385         0xc9,                           /* leave   */
386         0xc3,                           /* ret     */
387 };
388
389 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
390
391 static int 
392 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
393 {
394         int have_cpuid = 0;
395         __asm__  __volatile__ (
396                 "pushfl\n"
397                 "popl %%eax\n"
398                 "movl %%eax, %%edx\n"
399                 "xorl $0x200000, %%eax\n"
400                 "pushl %%eax\n"
401                 "popfl\n"
402                 "pushfl\n"
403                 "popl %%eax\n"
404                 "xorl %%edx, %%eax\n"
405                 "andl $0x200000, %%eax\n"
406                 "movl %%eax, %0"
407                 : "=r" (have_cpuid)
408                 :
409                 : "%eax", "%edx"
410         );
411
412         if (have_cpuid) {
413                 CpuidFunc func = (CpuidFunc)cpuid_impl;
414                 func (id, p_eax, p_ebx, p_ecx, p_edx);
415                 /*
416                  * We use this approach because of issues with gcc and pic code, see:
417                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
418                 __asm__ __volatile__ ("cpuid"
419                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
420                         : "a" (id));
421                 */
422                 return 1;
423         }
424         return 0;
425 }
426
427 /*
428  * Initialize the cpu to execute managed code.
429  */
430 void
431 mono_arch_cpu_init (void)
432 {
433         guint16 fpcw;
434
435         /* spec compliance requires running with double precision */
436         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
437         fpcw &= ~X86_FPCW_PRECC_MASK;
438         fpcw |= X86_FPCW_PREC_DOUBLE;
439         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
440         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
441
442 }
443
444 /*
445  * This function returns the optimizations supported on this cpu.
446  */
447 guint32
448 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
449 {
450         int eax, ebx, ecx, edx;
451         guint32 opts = 0;
452         
453         *exclude_mask = 0;
454         /* Feature Flags function, flags returned in EDX. */
455         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
456                 if (edx & (1 << 15)) {
457                         opts |= MONO_OPT_CMOV;
458                         if (edx & 1)
459                                 opts |= MONO_OPT_FCMOV;
460                         else
461                                 *exclude_mask |= MONO_OPT_FCMOV;
462                 } else
463                         *exclude_mask |= MONO_OPT_CMOV;
464         }
465         return opts;
466 }
467
468 static gboolean
469 is_regsize_var (MonoType *t) {
470         if (t->byref)
471                 return TRUE;
472         switch (t->type) {
473         case MONO_TYPE_I4:
474         case MONO_TYPE_U4:
475         case MONO_TYPE_I:
476         case MONO_TYPE_U:
477                 return TRUE;
478         case MONO_TYPE_OBJECT:
479         case MONO_TYPE_STRING:
480         case MONO_TYPE_CLASS:
481         case MONO_TYPE_SZARRAY:
482         case MONO_TYPE_ARRAY:
483                 return TRUE;
484         case MONO_TYPE_VALUETYPE:
485                 if (t->data.klass->enumtype)
486                         return is_regsize_var (t->data.klass->enum_basetype);
487                 return FALSE;
488         }
489         return FALSE;
490 }
491
492 GList *
493 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
494 {
495         GList *vars = NULL;
496         int i;
497
498         for (i = 0; i < cfg->num_varinfo; i++) {
499                 MonoInst *ins = cfg->varinfo [i];
500                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
501
502                 /* unused vars */
503                 if (vmv->range.first_use.abs_pos > vmv->range.last_use.abs_pos)
504                         continue;
505
506                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
507                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
508                         continue;
509
510                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
511                  * 8bit quantities in caller saved registers on x86 */
512                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
513                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
514                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
515                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
516                         g_assert (i == vmv->idx);
517                         vars = g_list_prepend (vars, vmv);
518                 }
519         }
520
521         vars = mono_varlist_sort (cfg, vars, 0);
522
523         return vars;
524 }
525
526 GList *
527 mono_arch_get_global_int_regs (MonoCompile *cfg)
528 {
529         GList *regs = NULL;
530
531         /* we can use 3 registers for global allocation */
532         regs = g_list_prepend (regs, (gpointer)X86_EBX);
533         regs = g_list_prepend (regs, (gpointer)X86_ESI);
534         regs = g_list_prepend (regs, (gpointer)X86_EDI);
535
536         return regs;
537 }
538  
539 /*
540  * Set var information according to the calling convention. X86 version.
541  * The locals var stuff should most likely be split in another method.
542  */
543 void
544 mono_arch_allocate_vars (MonoCompile *m)
545 {
546         MonoMethodSignature *sig;
547         MonoMethodHeader *header;
548         MonoInst *inst;
549         int i, offset, size, align, curinst;
550
551         header = ((MonoMethodNormal *)m->method)->header;
552
553         sig = m->method->signature;
554
555         offset = 8;
556         curinst = 0;
557         if (MONO_TYPE_ISSTRUCT (sig->ret)) {
558                 m->ret->opcode = OP_REGOFFSET;
559                 m->ret->inst_basereg = X86_EBP;
560                 m->ret->inst_offset = offset;
561                 offset += sizeof (gpointer);
562         } else {
563                 /* FIXME: handle long and FP values */
564                 switch (sig->ret->type) {
565                 case MONO_TYPE_VOID:
566                         break;
567                 default:
568                         m->ret->opcode = OP_REGVAR;
569                         m->ret->inst_c0 = X86_EAX;
570                         break;
571                 }
572         }
573         if (sig->hasthis) {
574                 inst = m->varinfo [curinst];
575                 if (inst->opcode != OP_REGVAR) {
576                         inst->opcode = OP_REGOFFSET;
577                         inst->inst_basereg = X86_EBP;
578                 }
579                 inst->inst_offset = offset;
580                 offset += sizeof (gpointer);
581                 curinst++;
582         }
583
584         if (sig->call_convention == MONO_CALL_VARARG) {
585                 m->sig_cookie = offset;
586                 offset += sizeof (gpointer);
587         }
588
589         for (i = 0; i < sig->param_count; ++i) {
590                 inst = m->varinfo [curinst];
591                 if (inst->opcode != OP_REGVAR) {
592                         inst->opcode = OP_REGOFFSET;
593                         inst->inst_basereg = X86_EBP;
594                 }
595                 inst->inst_offset = offset;
596                 size = mono_type_size (sig->params [i], &align);
597                 size += 4 - 1;
598                 size &= ~(4 - 1);
599                 offset += size;
600                 curinst++;
601         }
602
603         offset = 0;
604
605         /* reserve space to save LMF and caller saved registers */
606
607         if (m->method->save_lmf) {
608                 offset += sizeof (MonoLMF);
609         } else {
610                 if (m->used_int_regs & (1 << X86_EBX)) {
611                         offset += 4;
612                 }
613
614                 if (m->used_int_regs & (1 << X86_EDI)) {
615                         offset += 4;
616                 }
617
618                 if (m->used_int_regs & (1 << X86_ESI)) {
619                         offset += 4;
620                 }
621         }
622
623         for (i = curinst; i < m->num_varinfo; ++i) {
624                 inst = m->varinfo [i];
625
626                 if ((inst->flags & MONO_INST_IS_DEAD) || inst->opcode == OP_REGVAR)
627                         continue;
628
629                 /* inst->unused indicates native sized value types, this is used by the
630                 * pinvoke wrappers when they call functions returning structure */
631                 if (inst->unused && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF)
632                         size = mono_class_native_size (inst->inst_vtype->data.klass, &align);
633                 else
634                         size = mono_type_size (inst->inst_vtype, &align);
635
636                 offset += size;
637                 offset += align - 1;
638                 offset &= ~(align - 1);
639                 inst->opcode = OP_REGOFFSET;
640                 inst->inst_basereg = X86_EBP;
641                 inst->inst_offset = -offset;
642                 //g_print ("allocating local %d to %d\n", i, -offset);
643         }
644         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
645         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
646
647         /* change sign? */
648         m->stack_offset = -offset;
649 }
650
651 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
652  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
653  */
654
655 /* 
656  * take the arguments and generate the arch-specific
657  * instructions to properly call the function in call.
658  * This includes pushing, moving arguments to the right register
659  * etc.
660  * Issue: who does the spilling if needed, and when?
661  */
662 MonoCallInst*
663 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
664         MonoInst *arg, *in;
665         MonoMethodSignature *sig;
666         int i, n, stack_size, type;
667         MonoType *ptype;
668
669         stack_size = 0;
670         /* add the vararg cookie before the non-implicit args */
671         if (call->signature->call_convention == MONO_CALL_VARARG) {
672                 MonoInst *sig_arg;
673                 /* FIXME: Add support for signature tokens to AOT */
674                 cfg->disable_aot = TRUE;
675                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
676                 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
677                 sig_arg->inst_p0 = call->signature;
678                 arg->inst_left = sig_arg;
679                 arg->type = STACK_PTR;
680                 /* prepend, so they get reversed */
681                 arg->next = call->out_args;
682                 call->out_args = arg;
683                 stack_size += sizeof (gpointer);
684         }
685         sig = call->signature;
686         n = sig->param_count + sig->hasthis;
687
688         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
689                 stack_size += sizeof (gpointer);
690         for (i = 0; i < n; ++i) {
691                 if (is_virtual && i == 0) {
692                         /* the argument will be attached to the call instrucion */
693                         in = call->args [i];
694                         stack_size += 4;
695                 } else {
696                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
697                         in = call->args [i];
698                         arg->cil_code = in->cil_code;
699                         arg->inst_left = in;
700                         arg->type = in->type;
701                         /* prepend, so they get reversed */
702                         arg->next = call->out_args;
703                         call->out_args = arg;
704                         if (i >= sig->hasthis) {
705                                 ptype = sig->params [i - sig->hasthis];
706                                 if (ptype->byref)
707                                         type = MONO_TYPE_U;
708                                 else
709                                         type = ptype->type;
710 handle_enum:
711                                 /* FIXME: validate arguments... */
712                                 switch (type) {
713                                 case MONO_TYPE_I:
714                                 case MONO_TYPE_U:
715                                 case MONO_TYPE_BOOLEAN:
716                                 case MONO_TYPE_CHAR:
717                                 case MONO_TYPE_I1:
718                                 case MONO_TYPE_U1:
719                                 case MONO_TYPE_I2:
720                                 case MONO_TYPE_U2:
721                                 case MONO_TYPE_I4:
722                                 case MONO_TYPE_U4:
723                                 case MONO_TYPE_STRING:
724                                 case MONO_TYPE_CLASS:
725                                 case MONO_TYPE_OBJECT:
726                                 case MONO_TYPE_PTR:
727                                 case MONO_TYPE_FNPTR:
728                                 case MONO_TYPE_ARRAY:
729                                 case MONO_TYPE_SZARRAY:
730                                         stack_size += 4;
731                                         break;
732                                 case MONO_TYPE_I8:
733                                 case MONO_TYPE_U8:
734                                         stack_size += 8;
735                                         break;
736                                 case MONO_TYPE_R4:
737                                         stack_size += 4;
738                                         arg->opcode = OP_OUTARG_R4;
739                                         break;
740                                 case MONO_TYPE_R8:
741                                         stack_size += 8;
742                                         arg->opcode = OP_OUTARG_R8;
743                                         break;
744                                 case MONO_TYPE_VALUETYPE:
745                                         if (MONO_TYPE_ISSTRUCT (ptype)) {
746                                                 int size;
747                                                 if (sig->pinvoke) 
748                                                         size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
749                                                 else 
750                                                         size = mono_type_stack_size (&in->klass->byval_arg, NULL);
751
752                                                 stack_size += size;
753                                                 arg->opcode = OP_OUTARG_VT;
754                                                 arg->klass = in->klass;
755                                                 arg->unused = sig->pinvoke;
756                                                 arg->inst_imm = size; 
757                                         } else {
758                                                 type = ptype->data.klass->enum_basetype->type;
759                                                 goto handle_enum;
760                                         }
761                                         break;
762                                 case MONO_TYPE_TYPEDBYREF:
763                                         stack_size += sizeof (MonoTypedRef);
764                                         arg->opcode = OP_OUTARG_VT;
765                                         arg->klass = in->klass;
766                                         arg->unused = sig->pinvoke;
767                                         arg->inst_imm = sizeof (MonoTypedRef); 
768                                         break;
769                                 case MONO_TYPE_GENERICINST:
770                                         type = ptype->data.generic_inst->generic_type->type;
771                                         goto handle_enum;
772
773                                 default:
774                                         g_error ("unknown type 0x%02x in mono_arch_call_opcode\n", type);
775                                 }
776                         } else {
777                                 /* the this argument */
778                                 stack_size += 4;
779                         }
780                 }
781         }
782         /* if the function returns a struct, the called method already does a ret $0x4 */
783         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
784                 stack_size -= 4;
785         call->stack_usage = stack_size;
786         /* 
787          * should set more info in call, such as the stack space
788          * used by the args that needs to be added back to esp
789          */
790
791         return call;
792 }
793
794 /*
795  * Allow tracing to work with this interface (with an optional argument)
796  */
797
798 /*
799  * This may be needed on some archs or for debugging support.
800  */
801 void
802 mono_arch_instrument_mem_needs (MonoMethod *method, int *stack, int *code)
803 {
804         /* no stack room needed now (may be needed for FASTCALL-trace support) */
805         *stack = 0;
806         /* split prolog-epilog requirements? */
807         *code = 50; /* max bytes needed: check this number */
808 }
809
810 void*
811 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
812 {
813         guchar *code = p;
814
815         /* if some args are passed in registers, we need to save them here */
816         x86_push_reg (code, X86_EBP);
817         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
818         x86_push_imm (code, cfg->method);
819         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
820         x86_call_code (code, 0);
821         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
822
823         return code;
824 }
825
826 enum {
827         SAVE_NONE,
828         SAVE_STRUCT,
829         SAVE_EAX,
830         SAVE_EAX_EDX,
831         SAVE_FP
832 };
833
834 void*
835 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
836 {
837         guchar *code = p;
838         int arg_size = 0, save_mode = SAVE_NONE;
839         MonoMethod *method = cfg->method;
840         int rtype = method->signature->ret->type;
841         
842 handle_enum:
843         switch (rtype) {
844         case MONO_TYPE_VOID:
845                 /* special case string .ctor icall */
846                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
847                         save_mode = SAVE_EAX;
848                 else
849                         save_mode = SAVE_NONE;
850                 break;
851         case MONO_TYPE_I8:
852         case MONO_TYPE_U8:
853                 save_mode = SAVE_EAX_EDX;
854                 break;
855         case MONO_TYPE_R4:
856         case MONO_TYPE_R8:
857                 save_mode = SAVE_FP;
858                 break;
859         case MONO_TYPE_VALUETYPE:
860                 if (method->signature->ret->data.klass->enumtype) {
861                         rtype = method->signature->ret->data.klass->enum_basetype->type;
862                         goto handle_enum;
863                 }
864                 save_mode = SAVE_STRUCT;
865                 break;
866         default:
867                 save_mode = SAVE_EAX;
868                 break;
869         }
870
871         switch (save_mode) {
872         case SAVE_EAX_EDX:
873                 x86_push_reg (code, X86_EDX);
874                 x86_push_reg (code, X86_EAX);
875                 if (enable_arguments) {
876                         x86_push_reg (code, X86_EDX);
877                         x86_push_reg (code, X86_EAX);
878                         arg_size = 8;
879                 }
880                 break;
881         case SAVE_EAX:
882                 x86_push_reg (code, X86_EAX);
883                 if (enable_arguments) {
884                         x86_push_reg (code, X86_EAX);
885                         arg_size = 4;
886                 }
887                 break;
888         case SAVE_FP:
889                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
890                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
891                 if (enable_arguments) {
892                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
893                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
894                         arg_size = 8;
895                 }
896                 break;
897         case SAVE_STRUCT:
898                 if (enable_arguments) {
899                         x86_push_membase (code, X86_EBP, 8);
900                         arg_size = 4;
901                 }
902                 break;
903         case SAVE_NONE:
904         default:
905                 break;
906         }
907
908
909         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
910         x86_push_imm (code, method);
911         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
912         x86_call_code (code, 0);
913         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
914
915         switch (save_mode) {
916         case SAVE_EAX_EDX:
917                 x86_pop_reg (code, X86_EAX);
918                 x86_pop_reg (code, X86_EDX);
919                 break;
920         case SAVE_EAX:
921                 x86_pop_reg (code, X86_EAX);
922                 break;
923         case SAVE_FP:
924                 x86_fld_membase (code, X86_ESP, 0, TRUE);
925                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
926                 break;
927         case SAVE_NONE:
928         default:
929                 break;
930         }
931
932         return code;
933 }
934
935 #define EMIT_COND_BRANCH(ins,cond,sign) \
936 if (ins->flags & MONO_INST_BRLABEL) { \
937         if (ins->inst_i0->inst_c0) { \
938                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
939         } else { \
940                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
941                 x86_branch32 (code, cond, 0, sign); \
942         } \
943 } else { \
944         if (ins->inst_true_bb->native_offset) { \
945                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
946         } else { \
947                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
948                 if ((cfg->opt & MONO_OPT_BRANCH) && \
949                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
950                         x86_branch8 (code, cond, 0, sign); \
951                 else \
952                         x86_branch32 (code, cond, 0, sign); \
953         } \
954 }
955
956 /* emit an exception if condition is fail */
957 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
958         do {                                                        \
959                 mono_add_patch_info (cfg, code - cfg->native_code,   \
960                                     MONO_PATCH_INFO_EXC, exc_name);  \
961                 x86_branch32 (code, cond, 0, signed);               \
962         } while (0); 
963
964 #define EMIT_FPCOMPARE(code) do { \
965         x86_fcompp (code); \
966         x86_fnstsw (code); \
967 } while (0); 
968
969 static void
970 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
971 {
972         MonoInst *ins, *last_ins = NULL;
973         ins = bb->code;
974
975         while (ins) {
976
977                 switch (ins->opcode) {
978                 case OP_ICONST:
979                         /* reg = 0 -> XOR (reg, reg) */
980                         /* XOR sets cflags on x86, so we cant do it always */
981                         if (ins->inst_c0 == 0 && ins->next &&
982                             (ins->next->opcode == CEE_BR)) { 
983                                 ins->opcode = CEE_XOR;
984                                 ins->sreg1 = ins->dreg;
985                                 ins->sreg2 = ins->dreg;
986                         }
987                         break;
988                 case OP_MUL_IMM: 
989                         /* remove unnecessary multiplication with 1 */
990                         if (ins->inst_imm == 1) {
991                                 if (ins->dreg != ins->sreg1) {
992                                         ins->opcode = OP_MOVE;
993                                 } else {
994                                         last_ins->next = ins->next;                             
995                                         ins = ins->next;                                
996                                         continue;
997                                 }
998                         }
999                         break;
1000                 case OP_COMPARE_IMM:
1001                         /* OP_COMPARE_IMM (reg, 0) --> OP_X86_TEST_NULL (reg) */
1002                         if (ins->inst_imm == 0 && ins->next &&
1003                             (ins->next->opcode == CEE_BEQ || ins->next->opcode == CEE_BNE_UN ||
1004                              ins->next->opcode == OP_CEQ)) {
1005                                 ins->opcode = OP_X86_TEST_NULL;
1006                         }     
1007                         break;
1008                 case OP_LOAD_MEMBASE:
1009                 case OP_LOADI4_MEMBASE:
1010                         /* 
1011                          * OP_STORE_MEMBASE_REG reg, offset(basereg) 
1012                          * OP_LOAD_MEMBASE offset(basereg), reg
1013                          */
1014                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1015                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1016                             ins->inst_basereg == last_ins->inst_destbasereg &&
1017                             ins->inst_offset == last_ins->inst_offset) {
1018                                 if (ins->dreg == last_ins->sreg1) {
1019                                         last_ins->next = ins->next;                             
1020                                         ins = ins->next;                                
1021                                         continue;
1022                                 } else {
1023                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1024                                         ins->opcode = OP_MOVE;
1025                                         ins->sreg1 = last_ins->sreg1;
1026                                 }
1027
1028                         /* 
1029                          * Note: reg1 must be different from the basereg in the second load
1030                          * OP_LOAD_MEMBASE offset(basereg), reg1
1031                          * OP_LOAD_MEMBASE offset(basereg), reg2
1032                          * -->
1033                          * OP_LOAD_MEMBASE offset(basereg), reg1
1034                          * OP_MOVE reg1, reg2
1035                          */
1036                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1037                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1038                               ins->inst_basereg != last_ins->dreg &&
1039                               ins->inst_basereg == last_ins->inst_basereg &&
1040                               ins->inst_offset == last_ins->inst_offset) {
1041
1042                                 if (ins->dreg == last_ins->dreg) {
1043                                         last_ins->next = ins->next;                             
1044                                         ins = ins->next;                                
1045                                         continue;
1046                                 } else {
1047                                         ins->opcode = OP_MOVE;
1048                                         ins->sreg1 = last_ins->dreg;
1049                                 }
1050
1051                                 //g_assert_not_reached ();
1052
1053 #if 0
1054                         /* 
1055                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1056                          * OP_LOAD_MEMBASE offset(basereg), reg
1057                          * -->
1058                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1059                          * OP_ICONST reg, imm
1060                          */
1061                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1062                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1063                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1064                                    ins->inst_offset == last_ins->inst_offset) {
1065                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1066                                 ins->opcode = OP_ICONST;
1067                                 ins->inst_c0 = last_ins->inst_imm;
1068                                 g_assert_not_reached (); // check this rule
1069 #endif
1070                         }
1071                         break;
1072                 case OP_LOADU1_MEMBASE:
1073                 case OP_LOADI1_MEMBASE:
1074                   /*
1075                    * FIXME: Missing explanation
1076                    */
1077                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1078                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1079                                         ins->inst_offset == last_ins->inst_offset) {
1080                                 if (ins->dreg == last_ins->sreg1) {
1081                                         last_ins->next = ins->next;                             
1082                                         ins = ins->next;                                
1083                                         continue;
1084                                 } else {
1085                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1086                                         ins->opcode = OP_MOVE;
1087                                         ins->sreg1 = last_ins->sreg1;
1088                                 }
1089                         }
1090                         break;
1091                 case OP_LOADU2_MEMBASE:
1092                 case OP_LOADI2_MEMBASE:
1093                   /*
1094                    * FIXME: Missing explanation
1095                    */
1096                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1097                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1098                                         ins->inst_offset == last_ins->inst_offset) {
1099                                 if (ins->dreg == last_ins->sreg1) {
1100                                         last_ins->next = ins->next;                             
1101                                         ins = ins->next;                                
1102                                         continue;
1103                                 } else {
1104                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1105                                         ins->opcode = OP_MOVE;
1106                                         ins->sreg1 = last_ins->sreg1;
1107                                 }
1108                         }
1109                         break;
1110                 case CEE_CONV_I4:
1111                 case CEE_CONV_U4:
1112                 case OP_MOVE:
1113                         /* 
1114                          * OP_MOVE reg, reg 
1115                          */
1116                         if (ins->dreg == ins->sreg1) {
1117                                 if (last_ins)
1118                                         last_ins->next = ins->next;                             
1119                                 ins = ins->next;
1120                                 continue;
1121                         }
1122                         /* 
1123                          * OP_MOVE sreg, dreg 
1124                          * OP_MOVE dreg, sreg
1125                          */
1126                         if (last_ins && last_ins->opcode == OP_MOVE &&
1127                             ins->sreg1 == last_ins->dreg &&
1128                             ins->dreg == last_ins->sreg1) {
1129                                 last_ins->next = ins->next;                             
1130                                 ins = ins->next;                                
1131                                 continue;
1132                         }
1133                         break;
1134                 }
1135                 last_ins = ins;
1136                 ins = ins->next;
1137         }
1138         bb->last_ins = last_ins;
1139 }
1140
1141 static const int 
1142 branch_cc_table [] = {
1143         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1144         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1145         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1146 };
1147
1148 /*
1149  * returns the offset used by spillvar. It allocates a new
1150  * spill variable if necessary. 
1151  */
1152 static int
1153 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1154 {
1155         MonoSpillInfo **si, *info;
1156         int i = 0;
1157
1158         si = &cfg->spill_info; 
1159         
1160         while (i <= spillvar) {
1161
1162                 if (!*si) {
1163                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1164                         info->next = NULL;
1165                         cfg->stack_offset -= sizeof (gpointer);
1166                         info->offset = cfg->stack_offset;
1167                 }
1168
1169                 if (i == spillvar)
1170                         return (*si)->offset;
1171
1172                 i++;
1173                 si = &(*si)->next;
1174         }
1175
1176         g_assert_not_reached ();
1177         return 0;
1178 }
1179
1180 #define DEBUG(a) if (cfg->verbose_level > 1) a
1181 //#define DEBUG(a)
1182 #define reg_is_freeable(r) ((r) >= 0 && (r) <= 7 && X86_IS_CALLEE ((r)))
1183
1184 typedef struct {
1185         int born_in;
1186         int killed_in;
1187         int last_use;
1188         int prev_use;
1189 } RegTrack;
1190
1191 static const char*const * ins_spec = pentium_desc;
1192
1193 static void
1194 print_ins (int i, MonoInst *ins)
1195 {
1196         const char *spec = ins_spec [ins->opcode];
1197         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1198         if (spec [MONO_INST_DEST]) {
1199                 if (ins->dreg >= MONO_MAX_IREGS)
1200                         g_print (" R%d <-", ins->dreg);
1201                 else
1202                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1203         }
1204         if (spec [MONO_INST_SRC1]) {
1205                 if (ins->sreg1 >= MONO_MAX_IREGS)
1206                         g_print (" R%d", ins->sreg1);
1207                 else
1208                         g_print (" %s", mono_arch_regname (ins->sreg1));
1209         }
1210         if (spec [MONO_INST_SRC2]) {
1211                 if (ins->sreg2 >= MONO_MAX_IREGS)
1212                         g_print (" R%d", ins->sreg2);
1213                 else
1214                         g_print (" %s", mono_arch_regname (ins->sreg2));
1215         }
1216         if (spec [MONO_INST_CLOB])
1217                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1218         g_print ("\n");
1219 }
1220
1221 static void
1222 print_regtrack (RegTrack *t, int num)
1223 {
1224         int i;
1225         char buf [32];
1226         const char *r;
1227         
1228         for (i = 0; i < num; ++i) {
1229                 if (!t [i].born_in)
1230                         continue;
1231                 if (i >= MONO_MAX_IREGS) {
1232                         g_snprintf (buf, sizeof(buf), "R%d", i);
1233                         r = buf;
1234                 } else
1235                         r = mono_arch_regname (i);
1236                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1237         }
1238 }
1239
1240 typedef struct InstList InstList;
1241
1242 struct InstList {
1243         InstList *prev;
1244         InstList *next;
1245         MonoInst *data;
1246 };
1247
1248 static inline InstList*
1249 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1250 {
1251         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1252         item->data = data;
1253         item->prev = NULL;
1254         item->next = list;
1255         if (list)
1256                 list->prev = item;
1257         return item;
1258 }
1259
1260 /*
1261  * Force the spilling of the variable in the symbolic register 'reg'.
1262  */
1263 static int
1264 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1265 {
1266         MonoInst *load;
1267         int i, sel, spill;
1268         
1269         sel = cfg->rs->iassign [reg];
1270         /*i = cfg->rs->isymbolic [sel];
1271         g_assert (i == reg);*/
1272         i = reg;
1273         spill = ++cfg->spill_count;
1274         cfg->rs->iassign [i] = -spill - 1;
1275         mono_regstate_free_int (cfg->rs, sel);
1276         /* we need to create a spill var and insert a load to sel after the current instruction */
1277         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1278         load->dreg = sel;
1279         load->inst_basereg = X86_EBP;
1280         load->inst_offset = mono_spillvar_offset (cfg, spill);
1281         if (item->prev) {
1282                 while (ins->next != item->prev->data)
1283                         ins = ins->next;
1284         }
1285         load->next = ins->next;
1286         ins->next = load;
1287         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1288         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1289         g_assert (i == sel);
1290
1291         return sel;
1292 }
1293
1294 static int
1295 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1296 {
1297         MonoInst *load;
1298         int i, sel, spill;
1299
1300         DEBUG (g_print ("start regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1301         /* exclude the registers in the current instruction */
1302         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1303                 if (ins->sreg1 >= MONO_MAX_IREGS)
1304                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1305                 else
1306                         regmask &= ~ (1 << ins->sreg1);
1307                 DEBUG (g_print ("excluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1308         }
1309         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1310                 if (ins->sreg2 >= MONO_MAX_IREGS)
1311                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1312                 else
1313                         regmask &= ~ (1 << ins->sreg2);
1314                 DEBUG (g_print ("excluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1315         }
1316         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1317                 regmask &= ~ (1 << ins->dreg);
1318                 DEBUG (g_print ("excluding dreg %s\n", mono_arch_regname (ins->dreg)));
1319         }
1320
1321         DEBUG (g_print ("available regmask: 0x%08x\n", regmask));
1322         g_assert (regmask); /* need at least a register we can free */
1323         sel = -1;
1324         /* we should track prev_use and spill the register that's farther */
1325         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1326                 if (regmask & (1 << i)) {
1327                         sel = i;
1328                         DEBUG (g_print ("selected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1329                         break;
1330                 }
1331         }
1332         i = cfg->rs->isymbolic [sel];
1333         spill = ++cfg->spill_count;
1334         cfg->rs->iassign [i] = -spill - 1;
1335         mono_regstate_free_int (cfg->rs, sel);
1336         /* we need to create a spill var and insert a load to sel after the current instruction */
1337         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1338         load->dreg = sel;
1339         load->inst_basereg = X86_EBP;
1340         load->inst_offset = mono_spillvar_offset (cfg, spill);
1341         if (item->prev) {
1342                 while (ins->next != item->prev->data)
1343                         ins = ins->next;
1344         }
1345         load->next = ins->next;
1346         ins->next = load;
1347         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1348         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1349         g_assert (i == sel);
1350         
1351         return sel;
1352 }
1353
1354 static MonoInst*
1355 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1356 {
1357         MonoInst *copy;
1358         MONO_INST_NEW (cfg, copy, OP_MOVE);
1359         copy->dreg = dest;
1360         copy->sreg1 = src;
1361         if (ins) {
1362                 copy->next = ins->next;
1363                 ins->next = copy;
1364         }
1365         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1366         return copy;
1367 }
1368
1369 static MonoInst*
1370 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1371 {
1372         MonoInst *store;
1373         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1374         store->sreg1 = reg;
1375         store->inst_destbasereg = X86_EBP;
1376         store->inst_offset = mono_spillvar_offset (cfg, spill);
1377         if (ins) {
1378                 store->next = ins->next;
1379                 ins->next = store;
1380         }
1381         DEBUG (g_print ("SPILLED STORE (%d at 0x%08x(%%ebp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
1382         return store;
1383 }
1384
1385 static void
1386 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1387 {
1388         MonoInst *prev;
1389         if (item->next) {
1390                 prev = item->next->data;
1391
1392                 while (prev->next != ins)
1393                         prev = prev->next;
1394                 to_insert->next = ins;
1395                 prev->next = to_insert;
1396         } else {
1397                 to_insert->next = ins;
1398         }
1399         /* 
1400          * needed otherwise in the next instruction we can add an ins to the 
1401          * end and that would get past this instruction.
1402          */
1403         item->data = to_insert; 
1404 }
1405
1406 #if  0
1407 static int
1408 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1409 {
1410         int val = cfg->rs->iassign [sym_reg];
1411         if (val < 0) {
1412                 int spill = 0;
1413                 if (val < -1) {
1414                         /* the register gets spilled after this inst */
1415                         spill = -val -1;
1416                 }
1417                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1418                 if (val < 0)
1419                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1420                 cfg->rs->iassign [sym_reg] = val;
1421                 /* add option to store before the instruction for src registers */
1422                 if (spill)
1423                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1424         }
1425         cfg->rs->isymbolic [val] = sym_reg;
1426         return val;
1427 }
1428 #endif
1429
1430 /*#include "cprop.c"*/
1431
1432 /*
1433  * Local register allocation.
1434  * We first scan the list of instructions and we save the liveness info of
1435  * each register (when the register is first used, when it's value is set etc.).
1436  * We also reverse the list of instructions (in the InstList list) because assigning
1437  * registers backwards allows for more tricks to be used.
1438  */
1439 void
1440 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1441 {
1442         MonoInst *ins;
1443         MonoRegState *rs = cfg->rs;
1444         int i, val, fpcount;
1445         RegTrack *reginfo, *reginfof;
1446         RegTrack *reginfo1, *reginfo2, *reginfod;
1447         InstList *tmp, *reversed = NULL;
1448         const char *spec;
1449         guint32 src1_mask, src2_mask, dest_mask;
1450
1451         if (!bb->code)
1452                 return;
1453         rs->next_vireg = bb->max_ireg;
1454         rs->next_vfreg = bb->max_freg;
1455         mono_regstate_assign (rs);
1456         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1457         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1458         rs->ifree_mask = X86_CALLEE_REGS;
1459
1460         ins = bb->code;
1461
1462         /*if (cfg->opt & MONO_OPT_COPYPROP)
1463                 local_copy_prop (cfg, ins);*/
1464         
1465         i = 1;
1466         fpcount = 0; /* FIXME: track fp stack utilization */
1467         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1468         /* forward pass on the instructions to collect register liveness info */
1469         while (ins) {
1470                 spec = ins_spec [ins->opcode];
1471                 DEBUG (print_ins (i, ins));
1472                 if (spec [MONO_INST_SRC1]) {
1473                         if (spec [MONO_INST_SRC1] == 'f')
1474                                 reginfo1 = reginfof;
1475                         else
1476                                 reginfo1 = reginfo;
1477                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1478                         reginfo1 [ins->sreg1].last_use = i;
1479                 } else {
1480                         ins->sreg1 = -1;
1481                 }
1482                 if (spec [MONO_INST_SRC2]) {
1483                         if (spec [MONO_INST_SRC2] == 'f')
1484                                 reginfo2 = reginfof;
1485                         else
1486                                 reginfo2 = reginfo;
1487                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1488                         reginfo2 [ins->sreg2].last_use = i;
1489                 } else {
1490                         ins->sreg2 = -1;
1491                 }
1492                 if (spec [MONO_INST_DEST]) {
1493                         if (spec [MONO_INST_DEST] == 'f')
1494                                 reginfod = reginfof;
1495                         else
1496                                 reginfod = reginfo;
1497                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1498                                 reginfod [ins->dreg].killed_in = i;
1499                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1500                         reginfod [ins->dreg].last_use = i;
1501                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1502                                 reginfod [ins->dreg].born_in = i;
1503                         if (spec [MONO_INST_DEST] == 'l') {
1504                                 /* result in eax:edx, the virtual register is allocated sequentially */
1505                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1506                                 reginfod [ins->dreg + 1].last_use = i;
1507                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1508                                         reginfod [ins->dreg + 1].born_in = i;
1509                         }
1510                 } else {
1511                         ins->dreg = -1;
1512                 }
1513                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
1514                 ++i;
1515                 ins = ins->next;
1516         }
1517
1518         DEBUG (print_regtrack (reginfo, rs->next_vireg));
1519         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
1520         tmp = reversed;
1521         while (tmp) {
1522                 int prev_dreg, prev_sreg1, prev_sreg2;
1523                 dest_mask = src1_mask = src2_mask = X86_CALLEE_REGS;
1524                 --i;
1525                 ins = tmp->data;
1526                 spec = ins_spec [ins->opcode];
1527                 prev_dreg = -1;
1528                 DEBUG (g_print ("processing:"));
1529                 DEBUG (print_ins (i, ins));
1530                 if (spec [MONO_INST_CLOB] == 's') {
1531                         if (rs->ifree_mask & (1 << X86_ECX)) {
1532                                 DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
1533                                 rs->iassign [ins->sreg2] = X86_ECX;
1534                                 rs->isymbolic [X86_ECX] = ins->sreg2;
1535                                 ins->sreg2 = X86_ECX;
1536                                 rs->ifree_mask &= ~ (1 << X86_ECX);
1537                         } else {
1538                                 int need_ecx_spill = TRUE;
1539                                 /* 
1540                                  * we first check if src1/dreg is already assigned a register
1541                                  * and then we force a spill of the var assigned to ECX.
1542                                  */
1543                                 /* the destination register can't be ECX */
1544                                 dest_mask &= ~ (1 << X86_ECX);
1545                                 src1_mask &= ~ (1 << X86_ECX);
1546                                 val = rs->iassign [ins->dreg];
1547                                 /* 
1548                                  * the destination register is already assigned to ECX:
1549                                  * we need to allocate another register for it and then
1550                                  * copy from this to ECX.
1551                                  */
1552                                 if (val == X86_ECX && ins->dreg != ins->sreg2) {
1553                                         int new_dest = mono_regstate_alloc_int (rs, dest_mask);
1554                                         if (new_dest < 0)
1555                                                 new_dest = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
1556                                         g_assert (new_dest >= 0);
1557                                         ins->dreg = new_dest;
1558                                         create_copy_ins (cfg, X86_ECX, new_dest, ins);
1559                                         need_ecx_spill = FALSE;
1560                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
1561                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
1562                                         rs->iassign [ins->dreg] = val;
1563                                         rs->isymbolic [val] = prev_dreg;
1564                                         ins->dreg = val;*/
1565                                 }
1566                                 val = rs->iassign [ins->sreg1];
1567                                 if (val == X86_ECX) {
1568                                         g_assert_not_reached ();
1569                                 } else if (val >= 0) {
1570                                         /* 
1571                                          * the first src reg was already assigned to a register,
1572                                          * we need to copy it to the dest register because the 
1573                                          * shift instruction clobbers the first operand.
1574                                          */
1575                                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, val, NULL);
1576                                         insert_before_ins (ins, tmp, copy);
1577                                 }
1578                                 val = rs->iassign [ins->sreg2];
1579                                 if (val >= 0 && val != X86_ECX) {
1580                                         MonoInst *move = create_copy_ins (cfg, X86_ECX, val, NULL);
1581                                         DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
1582                                         move->next = ins;
1583                                         g_assert_not_reached ();
1584                                         /* FIXME: where is move connected to the instruction list? */
1585                                         //tmp->prev->data->next = move;
1586                                 }
1587                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << X86_ECX))) {
1588                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_ECX]));
1589                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_ECX]);
1590                                         mono_regstate_free_int (rs, X86_ECX);
1591                                 }
1592                                 /* force-set sreg2 */
1593                                 rs->iassign [ins->sreg2] = X86_ECX;
1594                                 rs->isymbolic [X86_ECX] = ins->sreg2;
1595                                 ins->sreg2 = X86_ECX;
1596                                 rs->ifree_mask &= ~ (1 << X86_ECX);
1597                         }
1598                 } else if (spec [MONO_INST_CLOB] == 'd') { /* division */
1599                         int dest_reg = X86_EAX;
1600                         int clob_reg = X86_EDX;
1601                         if (spec [MONO_INST_DEST] == 'd') {
1602                                 dest_reg = X86_EDX; /* reminder */
1603                                 clob_reg = X86_EAX;
1604                         }
1605                         val = rs->iassign [ins->dreg];
1606                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
1607                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
1608                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
1609                                 mono_regstate_free_int (rs, dest_reg);
1610                         }
1611                         if (val < 0) {
1612                                 if (val < -1) {
1613                                         /* the register gets spilled after this inst */
1614                                         int spill = -val -1;
1615                                         dest_mask = 1 << clob_reg;
1616                                         prev_dreg = ins->dreg;
1617                                         val = mono_regstate_alloc_int (rs, dest_mask);
1618                                         if (val < 0)
1619                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
1620                                         rs->iassign [ins->dreg] = val;
1621                                         if (spill)
1622                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
1623                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
1624                                         rs->isymbolic [val] = prev_dreg;
1625                                         ins->dreg = val;
1626                                         if (val != dest_reg) { /* force a copy */
1627                                                 create_copy_ins (cfg, val, dest_reg, ins);
1628                                         }
1629                                 } else {
1630                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
1631                                         prev_dreg = ins->dreg;
1632                                         rs->iassign [ins->dreg] = dest_reg;
1633                                         rs->isymbolic [dest_reg] = ins->dreg;
1634                                         ins->dreg = dest_reg;
1635                                         rs->ifree_mask &= ~ (1 << dest_reg);
1636                                 }
1637                         } else {
1638                                 //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
1639                                 if (val != dest_reg) { /* force a copy */
1640                                         create_copy_ins (cfg, val, dest_reg, ins);
1641                                         if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
1642                                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
1643                                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
1644                                                 mono_regstate_free_int (rs, dest_reg);
1645                                         }
1646                                 }
1647                         }
1648                         src1_mask = 1 << X86_EAX;
1649                         src2_mask = 1 << X86_ECX;
1650                 }
1651                 if (spec [MONO_INST_DEST] == 'l') {
1652                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
1653                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
1654                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
1655                                 mono_regstate_free_int (rs, X86_EAX);
1656                         }
1657                         if (!(rs->ifree_mask & (1 << X86_EDX))) {
1658                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EDX]));
1659                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
1660                                 mono_regstate_free_int (rs, X86_EDX);
1661                         }
1662                 }
1663
1664                 /* update for use with FP regs... */
1665                 if (spec [MONO_INST_DEST] != 'f' && ins->dreg >= MONO_MAX_IREGS) {
1666                         val = rs->iassign [ins->dreg];
1667                         prev_dreg = ins->dreg;
1668                         if (val < 0) {
1669                                 int spill = 0;
1670                                 if (val < -1) {
1671                                         /* the register gets spilled after this inst */
1672                                         spill = -val -1;
1673                                 }
1674                                 val = mono_regstate_alloc_int (rs, dest_mask);
1675                                 if (val < 0)
1676                                         val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
1677                                 rs->iassign [ins->dreg] = val;
1678                                 if (spill)
1679                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
1680                         }
1681                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
1682                         rs->isymbolic [val] = prev_dreg;
1683                         ins->dreg = val;
1684                         if (spec [MONO_INST_DEST] == 'l') {
1685                                 int hreg = prev_dreg + 1;
1686                                 val = rs->iassign [hreg];
1687                                 if (val < 0) {
1688                                         int spill = 0;
1689                                         if (val < -1) {
1690                                                 /* the register gets spilled after this inst */
1691                                                 spill = -val -1;
1692                                         }
1693                                         val = mono_regstate_alloc_int (rs, dest_mask);
1694                                         if (val < 0)
1695                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, hreg);
1696                                         rs->iassign [hreg] = val;
1697                                         if (spill)
1698                                                 create_spilled_store (cfg, spill, val, hreg, ins);
1699                                 }
1700                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
1701                                 rs->isymbolic [val] = hreg;
1702                                 /* FIXME:? ins->dreg = val; */
1703                                 if (ins->dreg == X86_EAX) {
1704                                         if (val != X86_EDX)
1705                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1706                                 } else if (ins->dreg == X86_EDX) {
1707                                         if (val == X86_EAX) {
1708                                                 /* swap */
1709                                                 g_assert_not_reached ();
1710                                         } else {
1711                                                 /* two forced copies */
1712                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1713                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1714                                         }
1715                                 } else {
1716                                         if (val == X86_EDX) {
1717                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1718                                         } else {
1719                                                 /* two forced copies */
1720                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1721                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1722                                         }
1723                                 }
1724                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
1725                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
1726                                         mono_regstate_free_int (rs, val);
1727                                 }
1728                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != X86_EAX && spec [MONO_INST_CLOB] != 'd') {
1729                                 /* this instruction only outputs to EAX, need to copy */
1730                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1731                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != X86_EDX && spec [MONO_INST_CLOB] != 'd') {
1732                                 create_copy_ins (cfg, ins->dreg, X86_EDX, ins);
1733                         }
1734                 }
1735                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
1736                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
1737                         mono_regstate_free_int (rs, ins->dreg);
1738                 }
1739                 /* put src1 in EAX if it needs to be */
1740                 if (spec [MONO_INST_SRC1] == 'a') {
1741                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
1742                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
1743                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
1744                                 mono_regstate_free_int (rs, X86_EAX);
1745                         }
1746                         /* force-set sreg1 */
1747                         rs->iassign [ins->sreg1] = X86_EAX;
1748                         rs->isymbolic [X86_EAX] = ins->sreg1;
1749                         ins->sreg1 = X86_EAX;
1750                         rs->ifree_mask &= ~ (1 << X86_EAX);
1751                 }
1752                 if (spec [MONO_INST_SRC1] != 'f' && ins->sreg1 >= MONO_MAX_IREGS) {
1753                         val = rs->iassign [ins->sreg1];
1754                         prev_sreg1 = ins->sreg1;
1755                         if (val < 0) {
1756                                 int spill = 0;
1757                                 if (val < -1) {
1758                                         /* the register gets spilled after this inst */
1759                                         spill = -val -1;
1760                                 }
1761                                 if (0 && ins->opcode == OP_MOVE) {
1762                                         /* 
1763                                          * small optimization: the dest register is already allocated
1764                                          * but the src one is not: we can simply assign the same register
1765                                          * here and peephole will get rid of the instruction later.
1766                                          * This optimization may interfere with the clobbering handling:
1767                                          * it removes a mov operation that will be added again to handle clobbering.
1768                                          * There are also some other issues that should with make testjit.
1769                                          */
1770                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
1771                                         val = rs->iassign [ins->sreg1] = ins->dreg;
1772                                         //g_assert (val >= 0);
1773                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
1774                                 } else {
1775                                         //g_assert (val == -1); /* source cannot be spilled */
1776                                         val = mono_regstate_alloc_int (rs, src1_mask);
1777                                         if (val < 0)
1778                                                 val = get_register_spilling (cfg, tmp, ins, src1_mask, ins->sreg1);
1779                                         rs->iassign [ins->sreg1] = val;
1780                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
1781                                 }
1782                                 if (spill) {
1783                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
1784                                         insert_before_ins (ins, tmp, store);
1785                                 }
1786                         }
1787                         rs->isymbolic [val] = prev_sreg1;
1788                         ins->sreg1 = val;
1789                 } else {
1790                         prev_sreg1 = -1;
1791                 }
1792                 /* handle clobbering of sreg1 */
1793                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
1794                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
1795                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
1796                         if (ins->sreg2 == -1 || spec [MONO_INST_CLOB] == 's') {
1797                                 /* note: the copy is inserted before the current instruction! */
1798                                 insert_before_ins (ins, tmp, copy);
1799                                 /* we set sreg1 to dest as well */
1800                                 prev_sreg1 = ins->sreg1 = ins->dreg;
1801                         } else {
1802                                 /* inserted after the operation */
1803                                 copy->next = ins->next;
1804                                 ins->next = copy;
1805                         }
1806                 }
1807                 if (spec [MONO_INST_SRC2] != 'f' && ins->sreg2 >= MONO_MAX_IREGS) {
1808                         val = rs->iassign [ins->sreg2];
1809                         prev_sreg2 = ins->sreg2;
1810                         if (val < 0) {
1811                                 int spill = 0;
1812                                 if (val < -1) {
1813                                         /* the register gets spilled after this inst */
1814                                         spill = -val -1;
1815                                 }
1816                                 val = mono_regstate_alloc_int (rs, src2_mask);
1817                                 if (val < 0)
1818                                         val = get_register_spilling (cfg, tmp, ins, src2_mask, ins->sreg2);
1819                                 rs->iassign [ins->sreg2] = val;
1820                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
1821                                 if (spill)
1822                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
1823                         }
1824                         rs->isymbolic [val] = prev_sreg2;
1825                         ins->sreg2 = val;
1826                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != X86_ECX) {
1827                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [X86_ECX]));
1828                         }
1829                 } else {
1830                         prev_sreg2 = -1;
1831                 }
1832
1833                 if (spec [MONO_INST_CLOB] == 'c') {
1834                         int j, s;
1835                         guint32 clob_mask = X86_CALLEE_REGS;
1836                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
1837                                 s = 1 << j;
1838                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
1839                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
1840                                 }
1841                         }
1842                 }
1843                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
1844                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
1845                         mono_regstate_free_int (rs, ins->sreg1);
1846                 }
1847                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
1848                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
1849                         mono_regstate_free_int (rs, ins->sreg2);
1850                 }*/
1851                 
1852                 //DEBUG (print_ins (i, ins));
1853                 /* this may result from a insert_before call */
1854                 if (!tmp->next)
1855                         bb->code = tmp->data;
1856                 tmp = tmp->next;
1857         }
1858
1859         g_free (reginfo);
1860         g_free (reginfof);
1861 }
1862
1863 static unsigned char*
1864 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1865 {
1866         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1867         x86_fnstcw_membase(code, X86_ESP, 0);
1868         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1869         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1870         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1871         x86_fldcw_membase (code, X86_ESP, 2);
1872         if (size == 8) {
1873                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1874                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1875                 x86_pop_reg (code, dreg);
1876                 /* FIXME: need the high register 
1877                  * x86_pop_reg (code, dreg_high);
1878                  */
1879         } else {
1880                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1881                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1882                 x86_pop_reg (code, dreg);
1883         }
1884         x86_fldcw_membase (code, X86_ESP, 0);
1885         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1886
1887         if (size == 1)
1888                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1889         else if (size == 2)
1890                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1891         return code;
1892 }
1893
1894 static unsigned char*
1895 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1896 {
1897         int sreg = tree->sreg1;
1898 #ifdef PLATFORM_WIN32
1899         guint8* br[5];
1900
1901         /*
1902          * Under Windows:
1903          * If requested stack size is larger than one page,
1904          * perform stack-touch operation
1905          */
1906         /*
1907          * Generate stack probe code.
1908          * Under Windows, it is necessary to allocate one page at a time,
1909          * "touching" stack after each successful sub-allocation. This is
1910          * because of the way stack growth is implemented - there is a
1911          * guard page before the lowest stack page that is currently commited.
1912          * Stack normally grows sequentially so OS traps access to the
1913          * guard page and commits more pages when needed.
1914          */
1915         x86_test_reg_imm (code, sreg, ~0xFFF);
1916         br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1917
1918         br[2] = code; /* loop */
1919         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1920         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1921         x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1922         x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1923         br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1924         x86_patch (br[3], br[2]);
1925         x86_test_reg_reg (code, sreg, sreg);
1926         br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1927         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1928
1929         br[1] = code; x86_jump8 (code, 0);
1930
1931         x86_patch (br[0], code);
1932         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1933         x86_patch (br[1], code);
1934         x86_patch (br[4], code);
1935 #else /* PLATFORM_WIN32 */
1936         x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1937 #endif
1938         if (tree->flags & MONO_INST_INIT) {
1939                 int offset = 0;
1940                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1941                         x86_push_reg (code, X86_EAX);
1942                         offset += 4;
1943                 }
1944                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1945                         x86_push_reg (code, X86_ECX);
1946                         offset += 4;
1947                 }
1948                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1949                         x86_push_reg (code, X86_EDI);
1950                         offset += 4;
1951                 }
1952                 
1953                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1954                 if (sreg != X86_ECX)
1955                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1956                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1957                                 
1958                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1959                 x86_cld (code);
1960                 x86_prefix (code, X86_REP_PREFIX);
1961                 x86_stosl (code);
1962                 
1963                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1964                         x86_pop_reg (code, X86_EDI);
1965                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1966                         x86_pop_reg (code, X86_ECX);
1967                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1968                         x86_pop_reg (code, X86_EAX);
1969         }
1970         return code;
1971 }
1972
1973 #define REAL_PRINT_REG(text,reg) \
1974 mono_assert (reg >= 0); \
1975 x86_push_reg (code, X86_EAX); \
1976 x86_push_reg (code, X86_EDX); \
1977 x86_push_reg (code, X86_ECX); \
1978 x86_push_reg (code, reg); \
1979 x86_push_imm (code, reg); \
1980 x86_push_imm (code, text " %d %p\n"); \
1981 x86_mov_reg_imm (code, X86_EAX, printf); \
1982 x86_call_reg (code, X86_EAX); \
1983 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1984 x86_pop_reg (code, X86_ECX); \
1985 x86_pop_reg (code, X86_EDX); \
1986 x86_pop_reg (code, X86_EAX);
1987
1988 void
1989 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1990 {
1991         MonoInst *ins;
1992         MonoCallInst *call;
1993         guint offset;
1994         guint8 *code = cfg->native_code + cfg->code_len;
1995         MonoInst *last_ins = NULL;
1996         guint last_offset = 0;
1997         int max_len, cpos;
1998
1999         if (cfg->opt & MONO_OPT_PEEPHOLE)
2000                 peephole_pass (cfg, bb);
2001
2002 #if 0
2003         /* 
2004          * various stratgies to align BBs. Using real loop detection or simply
2005          * aligning every block leads to more consistent benchmark results,
2006          * but usually slows down the code
2007          * we should do the alignment outside this function or we should adjust
2008          * bb->native offset as well or the code is effectively slowed down!
2009          */
2010         /* align all blocks */
2011 //      if ((pad = (cfg->code_len & (align - 1)))) {
2012         /* poor man loop start detection */
2013 //      if (bb->code && bb->in_count && bb->in_bb [0]->cil_code > bb->cil_code && (pad = (cfg->code_len & (align - 1)))) {
2014         /* consider real loop detection and nesting level */
2015 //      if (bb->loop_blocks && bb->nesting < 3 && (pad = (cfg->code_len & (align - 1)))) {
2016         /* consider real loop detection */
2017         if (bb->loop_blocks && (pad = (cfg->code_len & (align - 1)))) {
2018                 pad = align - pad;
2019                 x86_padding (code, pad);
2020                 cfg->code_len += pad;
2021                 bb->native_offset = cfg->code_len;
2022         }
2023 #endif
2024
2025         if (cfg->verbose_level > 2)
2026                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2027
2028         cpos = bb->max_offset;
2029
2030         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2031                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2032                 g_assert (!mono_compile_aot);
2033                 cpos += 6;
2034
2035                 cov->data [bb->dfn].cil_code = bb->cil_code;
2036                 /* this is not thread save, but good enough */
2037                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2038         }
2039
2040         offset = code - cfg->native_code;
2041
2042         ins = bb->code;
2043         while (ins) {
2044                 offset = code - cfg->native_code;
2045
2046                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2047
2048                 if (offset > (cfg->code_size - max_len - 16)) {
2049                         cfg->code_size *= 2;
2050                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2051                         code = cfg->native_code + offset;
2052                         mono_jit_stats.code_reallocs++;
2053                 }
2054
2055                 mono_debug_record_line_number (cfg, ins, offset);
2056
2057                 switch (ins->opcode) {
2058                 case OP_BIGMUL:
2059                         x86_mul_reg (code, ins->sreg2, TRUE);
2060                         break;
2061                 case OP_BIGMUL_UN:
2062                         x86_mul_reg (code, ins->sreg2, FALSE);
2063                         break;
2064                 case OP_X86_SETEQ_MEMBASE:
2065                         x86_set_membase (code, X86_CC_EQ, ins->inst_basereg, ins->inst_offset, TRUE);
2066                         break;
2067                 case OP_STOREI1_MEMBASE_IMM:
2068                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2069                         break;
2070                 case OP_STOREI2_MEMBASE_IMM:
2071                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2072                         break;
2073                 case OP_STORE_MEMBASE_IMM:
2074                 case OP_STOREI4_MEMBASE_IMM:
2075                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2076                         break;
2077                 case OP_STOREI1_MEMBASE_REG:
2078                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2079                         break;
2080                 case OP_STOREI2_MEMBASE_REG:
2081                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2082                         break;
2083                 case OP_STORE_MEMBASE_REG:
2084                 case OP_STOREI4_MEMBASE_REG:
2085                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2086                         break;
2087                 case CEE_LDIND_I:
2088                 case CEE_LDIND_I4:
2089                 case CEE_LDIND_U4:
2090                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2091                         break;
2092                 case OP_LOADU4_MEM:
2093                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2094                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2095                         break;
2096                 case OP_LOAD_MEMBASE:
2097                 case OP_LOADI4_MEMBASE:
2098                 case OP_LOADU4_MEMBASE:
2099                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2100                         break;
2101                 case OP_LOADU1_MEMBASE:
2102                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2103                         break;
2104                 case OP_LOADI1_MEMBASE:
2105                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2106                         break;
2107                 case OP_LOADU2_MEMBASE:
2108                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2109                         break;
2110                 case OP_LOADI2_MEMBASE:
2111                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2112                         break;
2113                 case CEE_CONV_I1:
2114                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2115                         break;
2116                 case CEE_CONV_I2:
2117                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2118                         break;
2119                 case CEE_CONV_U1:
2120                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2121                         break;
2122                 case CEE_CONV_U2:
2123                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2124                         break;
2125                 case OP_COMPARE:
2126                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2127                         break;
2128                 case OP_COMPARE_IMM:
2129                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2130                         break;
2131                 case OP_X86_COMPARE_MEMBASE_REG:
2132                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2133                         break;
2134                 case OP_X86_COMPARE_MEMBASE_IMM:
2135                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2136                         break;
2137                 case OP_X86_COMPARE_REG_MEMBASE:
2138                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2139                         break;
2140                 case OP_X86_TEST_NULL:
2141                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2142                         break;
2143                 case OP_X86_ADD_MEMBASE_IMM:
2144                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2145                         break;
2146                 case OP_X86_SUB_MEMBASE_IMM:
2147                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2148                         break;
2149                 case OP_X86_INC_MEMBASE:
2150                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2151                         break;
2152                 case OP_X86_INC_REG:
2153                         x86_inc_reg (code, ins->dreg);
2154                         break;
2155                 case OP_X86_DEC_MEMBASE:
2156                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2157                         break;
2158                 case OP_X86_DEC_REG:
2159                         x86_dec_reg (code, ins->dreg);
2160                         break;
2161                 case CEE_BREAK:
2162                         x86_breakpoint (code);
2163                         break;
2164                 case OP_ADDCC:
2165                 case CEE_ADD:
2166                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2167                         break;
2168                 case OP_ADC:
2169                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2170                         break;
2171                 case OP_ADD_IMM:
2172                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2173                         break;
2174                 case OP_ADC_IMM:
2175                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2176                         break;
2177                 case OP_SUBCC:
2178                 case CEE_SUB:
2179                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2180                         break;
2181                 case OP_SBB:
2182                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2183                         break;
2184                 case OP_SUB_IMM:
2185                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2186                         break;
2187                 case OP_SBB_IMM:
2188                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2189                         break;
2190                 case CEE_AND:
2191                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2192                         break;
2193                 case OP_AND_IMM:
2194                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2195                         break;
2196                 case CEE_DIV:
2197                         x86_cdq (code);
2198                         x86_div_reg (code, ins->sreg2, TRUE);
2199                         break;
2200                 case CEE_DIV_UN:
2201                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2202                         x86_div_reg (code, ins->sreg2, FALSE);
2203                         break;
2204                 case OP_DIV_IMM:
2205                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2206                         x86_cdq (code);
2207                         x86_div_reg (code, ins->sreg2, TRUE);
2208                         break;
2209                 case CEE_REM:
2210                         x86_cdq (code);
2211                         x86_div_reg (code, ins->sreg2, TRUE);
2212                         break;
2213                 case CEE_REM_UN:
2214                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2215                         x86_div_reg (code, ins->sreg2, FALSE);
2216                         break;
2217                 case OP_REM_IMM:
2218                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2219                         x86_cdq (code);
2220                         x86_div_reg (code, ins->sreg2, TRUE);
2221                         break;
2222                 case CEE_OR:
2223                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2224                         break;
2225                 case OP_OR_IMM:
2226                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2227                         break;
2228                 case CEE_XOR:
2229                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2230                         break;
2231                 case OP_XOR_IMM:
2232                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2233                         break;
2234                 case CEE_SHL:
2235                         g_assert (ins->sreg2 == X86_ECX);
2236                         x86_shift_reg (code, X86_SHL, ins->dreg);
2237                         break;
2238                 case CEE_SHR:
2239                         g_assert (ins->sreg2 == X86_ECX);
2240                         x86_shift_reg (code, X86_SAR, ins->dreg);
2241                         break;
2242                 case OP_SHR_IMM:
2243                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2244                         break;
2245                 case OP_SHR_UN_IMM:
2246                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2247                         break;
2248                 case CEE_SHR_UN:
2249                         g_assert (ins->sreg2 == X86_ECX);
2250                         x86_shift_reg (code, X86_SHR, ins->dreg);
2251                         break;
2252                 case OP_SHL_IMM:
2253                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2254                         break;
2255                 case CEE_NOT:
2256                         x86_not_reg (code, ins->sreg1);
2257                         break;
2258                 case CEE_NEG:
2259                         x86_neg_reg (code, ins->sreg1);
2260                         break;
2261                 case OP_SEXT_I1:
2262                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2263                         break;
2264                 case OP_SEXT_I2:
2265                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2266                         break;
2267                 case CEE_MUL:
2268                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2269                         break;
2270                 case OP_MUL_IMM:
2271                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2272                         break;
2273                 case CEE_MUL_OVF:
2274                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2275                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2276                         break;
2277                 case CEE_MUL_OVF_UN: {
2278                         /* the mul operation and the exception check should most likely be split */
2279                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2280                         /*g_assert (ins->sreg2 == X86_EAX);
2281                         g_assert (ins->dreg == X86_EAX);*/
2282                         if (ins->sreg2 == X86_EAX) {
2283                                 non_eax_reg = ins->sreg1;
2284                         } else if (ins->sreg1 == X86_EAX) {
2285                                 non_eax_reg = ins->sreg2;
2286                         } else {
2287                                 /* no need to save since we're going to store to it anyway */
2288                                 if (ins->dreg != X86_EAX) {
2289                                         saved_eax = TRUE;
2290                                         x86_push_reg (code, X86_EAX);
2291                                 }
2292                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2293                                 non_eax_reg = ins->sreg2;
2294                         }
2295                         if (ins->dreg == X86_EDX) {
2296                                 if (!saved_eax) {
2297                                         saved_eax = TRUE;
2298                                         x86_push_reg (code, X86_EAX);
2299                                 }
2300                         } else if (ins->dreg != X86_EAX) {
2301                                 saved_edx = TRUE;
2302                                 x86_push_reg (code, X86_EDX);
2303                         }
2304                         x86_mul_reg (code, non_eax_reg, FALSE);
2305                         /* save before the check since pop and mov don't change the flags */
2306                         if (saved_edx)
2307                                 x86_pop_reg (code, X86_EDX);
2308                         if (saved_eax)
2309                                 x86_pop_reg (code, X86_EAX);
2310                         if (ins->dreg != X86_EAX)
2311                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2312                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2313                         break;
2314                 }
2315                 case OP_ICONST:
2316                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2317                         break;
2318                 case OP_AOTCONST:
2319                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2320                         x86_mov_reg_imm (code, ins->dreg, 0);
2321                         break;
2322                 case CEE_CONV_I4:
2323                 case CEE_CONV_U4:
2324                 case OP_MOVE:
2325                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2326                         break;
2327                 case CEE_JMP: {
2328                         /*
2329                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2330                          * Keep in sync with the code in emit_epilog.
2331                          */
2332                         int pos = 0;
2333
2334                         /* FIXME: no tracing support... */
2335                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2336                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2337                         /* reset offset to make max_len work */
2338                         offset = code - cfg->native_code;
2339
2340                         g_assert (!cfg->method->save_lmf);
2341
2342                         if (cfg->used_int_regs & (1 << X86_EBX))
2343                                 pos -= 4;
2344                         if (cfg->used_int_regs & (1 << X86_EDI))
2345                                 pos -= 4;
2346                         if (cfg->used_int_regs & (1 << X86_ESI))
2347                                 pos -= 4;
2348                         if (pos)
2349                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2350         
2351                         if (cfg->used_int_regs & (1 << X86_ESI))
2352                                 x86_pop_reg (code, X86_ESI);
2353                         if (cfg->used_int_regs & (1 << X86_EDI))
2354                                 x86_pop_reg (code, X86_EDI);
2355                         if (cfg->used_int_regs & (1 << X86_EBX))
2356                                 x86_pop_reg (code, X86_EBX);
2357         
2358                         /* restore ESP/EBP */
2359                         x86_leave (code);
2360                         offset = code - cfg->native_code;
2361                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2362                         x86_jump32 (code, 0);
2363                         break;
2364                 }
2365                 case OP_CHECK_THIS:
2366                         /* ensure ins->sreg1 is not NULL */
2367                         x86_alu_membase_imm (code, X86_CMP, ins->sreg1, 0, 0);
2368                         break;
2369                 case OP_ARGLIST: {
2370                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2371                         x86_push_reg (code, hreg);
2372                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2373                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2374                         x86_pop_reg (code, hreg);
2375                         break;
2376                 }
2377                 case OP_FCALL:
2378                 case OP_LCALL:
2379                 case OP_VCALL:
2380                 case OP_VOIDCALL:
2381                 case CEE_CALL:
2382                         call = (MonoCallInst*)ins;
2383                         if (ins->flags & MONO_INST_HAS_METHOD)
2384                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD, call->method);
2385                         else {
2386                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, call->fptr);
2387                         }
2388                         x86_call_code (code, 0);
2389                         if (call->stack_usage && (call->signature->call_convention != MONO_CALL_STDCALL))
2390                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2391                         break;
2392                 case OP_FCALL_REG:
2393                 case OP_LCALL_REG:
2394                 case OP_VCALL_REG:
2395                 case OP_VOIDCALL_REG:
2396                 case OP_CALL_REG:
2397                         call = (MonoCallInst*)ins;
2398                         x86_call_reg (code, ins->sreg1);
2399                         if (call->stack_usage && (call->signature->call_convention != MONO_CALL_STDCALL))
2400                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2401                         break;
2402                 case OP_FCALL_MEMBASE:
2403                 case OP_LCALL_MEMBASE:
2404                 case OP_VCALL_MEMBASE:
2405                 case OP_VOIDCALL_MEMBASE:
2406                 case OP_CALL_MEMBASE:
2407                         call = (MonoCallInst*)ins;
2408                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2409                         if (call->stack_usage && (call->signature->call_convention != MONO_CALL_STDCALL))
2410                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2411                         break;
2412                 case OP_OUTARG:
2413                 case OP_X86_PUSH:
2414                         x86_push_reg (code, ins->sreg1);
2415                         break;
2416                 case OP_X86_PUSH_IMM:
2417                         x86_push_imm (code, ins->inst_imm);
2418                         break;
2419                 case OP_X86_PUSH_MEMBASE:
2420                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2421                         break;
2422                 case OP_X86_PUSH_OBJ: 
2423                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2424                         x86_push_reg (code, X86_EDI);
2425                         x86_push_reg (code, X86_ESI);
2426                         x86_push_reg (code, X86_ECX);
2427                         if (ins->inst_offset)
2428                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2429                         else
2430                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2431                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2432                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2433                         x86_cld (code);
2434                         x86_prefix (code, X86_REP_PREFIX);
2435                         x86_movsd (code);
2436                         x86_pop_reg (code, X86_ECX);
2437                         x86_pop_reg (code, X86_ESI);
2438                         x86_pop_reg (code, X86_EDI);
2439                         break;
2440                 case OP_X86_LEA:
2441                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2442                         break;
2443                 case OP_X86_LEA_MEMBASE:
2444                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2445                         break;
2446                 case OP_X86_XCHG:
2447                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2448                         break;
2449                 case OP_LOCALLOC:
2450                         /* keep alignment */
2451                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2452                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2453                         code = mono_emit_stack_alloc (code, ins);
2454                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2455                         break;
2456                 case CEE_RET:
2457                         x86_ret (code);
2458                         break;
2459                 case CEE_THROW: {
2460                         x86_push_reg (code, ins->sreg1);
2461                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2462                                              (gpointer)"mono_arch_throw_exception");
2463                         x86_call_code (code, 0);
2464                         break;
2465                 }
2466                 case OP_CALL_HANDLER: 
2467                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2468                         x86_call_imm (code, 0);
2469                         break;
2470                 case OP_LABEL:
2471                         ins->inst_c0 = code - cfg->native_code;
2472                         break;
2473                 case CEE_BR:
2474                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2475                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2476                         //break;
2477                         if (ins->flags & MONO_INST_BRLABEL) {
2478                                 if (ins->inst_i0->inst_c0) {
2479                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2480                                 } else {
2481                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2482                                         x86_jump32 (code, 0);
2483                                 }
2484                         } else {
2485                                 if (ins->inst_target_bb->native_offset) {
2486                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2487                                 } else {
2488                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2489                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2490                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2491                                                 x86_jump8 (code, 0);
2492                                         else 
2493                                                 x86_jump32 (code, 0);
2494                                 } 
2495                         }
2496                         break;
2497                 case OP_BR_REG:
2498                         x86_jump_reg (code, ins->sreg1);
2499                         break;
2500                 case OP_CEQ:
2501                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2502                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2503                         break;
2504                 case OP_CLT:
2505                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2506                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2507                         break;
2508                 case OP_CLT_UN:
2509                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2510                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2511                         break;
2512                 case OP_CGT:
2513                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2514                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2515                         break;
2516                 case OP_CGT_UN:
2517                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2518                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2519                         break;
2520                 case OP_COND_EXC_EQ:
2521                 case OP_COND_EXC_NE_UN:
2522                 case OP_COND_EXC_LT:
2523                 case OP_COND_EXC_LT_UN:
2524                 case OP_COND_EXC_GT:
2525                 case OP_COND_EXC_GT_UN:
2526                 case OP_COND_EXC_GE:
2527                 case OP_COND_EXC_GE_UN:
2528                 case OP_COND_EXC_LE:
2529                 case OP_COND_EXC_LE_UN:
2530                 case OP_COND_EXC_OV:
2531                 case OP_COND_EXC_NO:
2532                 case OP_COND_EXC_C:
2533                 case OP_COND_EXC_NC:
2534                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
2535                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2536                         break;
2537                 case CEE_BEQ:
2538                 case CEE_BNE_UN:
2539                 case CEE_BLT:
2540                 case CEE_BLT_UN:
2541                 case CEE_BGT:
2542                 case CEE_BGT_UN:
2543                 case CEE_BGE:
2544                 case CEE_BGE_UN:
2545                 case CEE_BLE:
2546                 case CEE_BLE_UN:
2547                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2548                         break;
2549
2550                 /* floating point opcodes */
2551                 case OP_R8CONST: {
2552                         double d = *(double *)ins->inst_p0;
2553
2554                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2555                                 x86_fldz (code);
2556                         } else if (d == 1.0) {
2557                                 x86_fld1 (code);
2558                         } else {
2559                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
2560                                 x86_fld (code, NULL, TRUE);
2561                         }
2562                         break;
2563                 }
2564                 case OP_R4CONST: {
2565                         float f = *(float *)ins->inst_p0;
2566
2567                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2568                                 x86_fldz (code);
2569                         } else if (f == 1.0) {
2570                                 x86_fld1 (code);
2571                         } else {
2572                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
2573                                 x86_fld (code, NULL, FALSE);
2574                         }
2575                         break;
2576                 }
2577                 case OP_STORER8_MEMBASE_REG:
2578                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2579                         break;
2580                 case OP_LOADR8_MEMBASE:
2581                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2582                         break;
2583                 case OP_STORER4_MEMBASE_REG:
2584                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2585                         break;
2586                 case OP_LOADR4_MEMBASE:
2587                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2588                         break;
2589                 case CEE_CONV_R4: /* FIXME: change precision */
2590                 case CEE_CONV_R8:
2591                         x86_push_reg (code, ins->sreg1);
2592                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2593                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2594                         break;
2595                 case OP_X86_FP_LOAD_I8:
2596                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2597                         break;
2598                 case OP_X86_FP_LOAD_I4:
2599                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2600                         break;
2601                 case OP_FCONV_TO_I1:
2602                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2603                         break;
2604                 case OP_FCONV_TO_U1:
2605                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2606                         break;
2607                 case OP_FCONV_TO_I2:
2608                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2609                         break;
2610                 case OP_FCONV_TO_U2:
2611                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2612                         break;
2613                 case OP_FCONV_TO_I4:
2614                 case OP_FCONV_TO_I:
2615                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2616                         break;
2617                 case OP_FCONV_TO_I8:
2618                         /* we defined this instruction to output only to eax:edx */
2619                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2620                         x86_fnstcw_membase(code, X86_ESP, 0);
2621                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, 0, 2);
2622                         x86_alu_reg_imm (code, X86_OR, X86_EAX, 0xc00);
2623                         x86_mov_membase_reg (code, X86_ESP, 2, X86_EAX, 2);
2624                         x86_fldcw_membase (code, X86_ESP, 2);
2625                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2626                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2627                         x86_pop_reg (code, X86_EAX);
2628                         x86_pop_reg (code, X86_EDX);
2629                         x86_fldcw_membase (code, X86_ESP, 0);
2630                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2631                         break;
2632                 case OP_LCONV_TO_R_UN: { 
2633                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2634                         guint8 *br;
2635
2636                         /* load 64bit integer to FP stack */
2637                         x86_push_imm (code, 0);
2638                         x86_push_reg (code, ins->sreg2);
2639                         x86_push_reg (code, ins->sreg1);
2640                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2641                         /* store as 80bit FP value */
2642                         x86_fst80_membase (code, X86_ESP, 0);
2643                         
2644                         /* test if lreg is negative */
2645                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2646                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2647         
2648                         /* add correction constant mn */
2649                         x86_fld80_mem (code, mn);
2650                         x86_fld80_membase (code, X86_ESP, 0);
2651                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2652                         x86_fst80_membase (code, X86_ESP, 0);
2653
2654                         x86_patch (br, code);
2655
2656                         x86_fld80_membase (code, X86_ESP, 0);
2657                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2658
2659                         break;
2660                 }
2661                 case OP_LCONV_TO_OVF_I: {
2662                         guint8 *br [3], *label [1];
2663
2664                         /* 
2665                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2666                          */
2667                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2668
2669                         /* If the low word top bit is set, see if we are negative */
2670                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2671                         /* We are not negative (no top bit set, check for our top word to be zero */
2672                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2673                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2674                         label [0] = code;
2675
2676                         /* throw exception */
2677                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2678                         x86_jump32 (code, 0);
2679         
2680                         x86_patch (br [0], code);
2681                         /* our top bit is set, check that top word is 0xfffffff */
2682                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2683                 
2684                         x86_patch (br [1], code);
2685                         /* nope, emit exception */
2686                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2687                         x86_patch (br [2], label [0]);
2688
2689                         if (ins->dreg != ins->sreg1)
2690                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2691                         break;
2692                 }
2693                 case OP_FADD:
2694                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2695                         break;
2696                 case OP_FSUB:
2697                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2698                         break;          
2699                 case OP_FMUL:
2700                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2701                         break;          
2702                 case OP_FDIV:
2703                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2704                         break;          
2705                 case OP_FNEG:
2706                         x86_fchs (code);
2707                         break;          
2708                 case OP_SIN:
2709                         x86_fsin (code);
2710                         break;          
2711                 case OP_COS:
2712                         x86_fcos (code);
2713                         break;          
2714                 case OP_ABS:
2715                         x86_fabs (code);
2716                         break;          
2717                 case OP_TAN: {
2718                         /* 
2719                          * it really doesn't make sense to inline all this code,
2720                          * it's here just to show that things may not be as simple 
2721                          * as they appear.
2722                          */
2723                         guchar *check_pos, *end_tan, *pop_jump;
2724                         x86_push_reg (code, X86_EAX);
2725                         x86_fptan (code);
2726                         x86_fnstsw (code);
2727                         x86_test_reg_imm (code, X86_EAX, 0x400);
2728                         check_pos = code;
2729                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2730                         x86_fstp (code, 0); /* pop the 1.0 */
2731                         end_tan = code;
2732                         x86_jump8 (code, 0);
2733                         x86_fldpi (code);
2734                         x86_fp_op (code, X86_FADD, 0);
2735                         x86_fxch (code, 1);
2736                         x86_fprem1 (code);
2737                         x86_fstsw (code);
2738                         x86_test_reg_imm (code, X86_EAX, 0x400);
2739                         pop_jump = code;
2740                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2741                         x86_fstp (code, 1);
2742                         x86_fptan (code);
2743                         x86_patch (pop_jump, code);
2744                         x86_fstp (code, 0); /* pop the 1.0 */
2745                         x86_patch (check_pos, code);
2746                         x86_patch (end_tan, code);
2747                         x86_pop_reg (code, X86_EAX);
2748                         break;
2749                 }
2750                 case OP_ATAN:
2751                         x86_fld1 (code);
2752                         x86_fpatan (code);
2753                         break;          
2754                 case OP_SQRT:
2755                         x86_fsqrt (code);
2756                         break;          
2757                 case OP_X86_FPOP:
2758                         x86_fstp (code, 0);
2759                         break;          
2760                 case OP_FREM: {
2761                         guint8 *l1, *l2;
2762
2763                         x86_push_reg (code, X86_EAX);
2764                         /* we need to exchange ST(0) with ST(1) */
2765                         x86_fxch (code, 1);
2766
2767                         /* this requires a loop, because fprem somtimes 
2768                          * returns a partial remainder */
2769                         l1 = code;
2770                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2771                         /* x86_fprem1 (code); */
2772                         x86_fprem (code);
2773                         x86_fnstsw (code);
2774                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x0400);
2775                         l2 = code + 2;
2776                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2777
2778                         /* pop result */
2779                         x86_fstp (code, 1);
2780
2781                         x86_pop_reg (code, X86_EAX);
2782                         break;
2783                 }
2784                 case OP_FCOMPARE:
2785                         if (cfg->opt & MONO_OPT_FCMOV) {
2786                                 x86_fcomip (code, 1);
2787                                 x86_fstp (code, 0);
2788                                 break;
2789                         }
2790                         /* this overwrites EAX */
2791                         EMIT_FPCOMPARE(code);
2792                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4500);
2793                         break;
2794                 case OP_FCEQ:
2795                         if (cfg->opt & MONO_OPT_FCMOV) {
2796                                 /* zeroing the register at the start results in 
2797                                  * shorter and faster code (we can also remove the widening op)
2798                                  */
2799                                 guchar *unordered_check;
2800                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2801                                 x86_fcomip (code, 1);
2802                                 x86_fstp (code, 0);
2803                                 unordered_check = code;
2804                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2805                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2806                                 x86_patch (unordered_check, code);
2807                                 break;
2808                         }
2809                         if (ins->dreg != X86_EAX) 
2810                                 x86_push_reg (code, X86_EAX);
2811
2812                         EMIT_FPCOMPARE(code);
2813                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4500);
2814                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2815                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2816                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2817
2818                         if (ins->dreg != X86_EAX) 
2819                                 x86_pop_reg (code, X86_EAX);
2820                         break;
2821                 case OP_FCLT:
2822                 case OP_FCLT_UN:
2823                         if (cfg->opt & MONO_OPT_FCMOV) {
2824                                 /* zeroing the register at the start results in 
2825                                  * shorter and faster code (we can also remove the widening op)
2826                                  */
2827                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2828                                 x86_fcomip (code, 1);
2829                                 x86_fstp (code, 0);
2830                                 if (ins->opcode == OP_FCLT_UN) {
2831                                         guchar *unordered_check = code;
2832                                         guchar *jump_to_end;
2833                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2834                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2835                                         jump_to_end = code;
2836                                         x86_jump8 (code, 0);
2837                                         x86_patch (unordered_check, code);
2838                                         x86_inc_reg (code, ins->dreg);
2839                                         x86_patch (jump_to_end, code);
2840                                 } else {
2841                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2842                                 }
2843                                 break;
2844                         }
2845                         if (ins->dreg != X86_EAX) 
2846                                 x86_push_reg (code, X86_EAX);
2847
2848                         EMIT_FPCOMPARE(code);
2849                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4500);
2850                         if (ins->opcode == OP_FCLT_UN) {
2851                                 guchar *is_not_zero_check, *end_jump;
2852                                 is_not_zero_check = code;
2853                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2854                                 end_jump = code;
2855                                 x86_jump8 (code, 0);
2856                                 x86_patch (is_not_zero_check, code);
2857                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4500);
2858
2859                                 x86_patch (end_jump, code);
2860                         }
2861                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2862                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2863
2864                         if (ins->dreg != X86_EAX) 
2865                                 x86_pop_reg (code, X86_EAX);
2866                         break;
2867                 case OP_FCGT:
2868                 case OP_FCGT_UN:
2869                         if (cfg->opt & MONO_OPT_FCMOV) {
2870                                 /* zeroing the register at the start results in 
2871                                  * shorter and faster code (we can also remove the widening op)
2872                                  */
2873                                 guchar *unordered_check;
2874                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2875                                 x86_fcomip (code, 1);
2876                                 x86_fstp (code, 0);
2877                                 if (ins->opcode == OP_FCGT) {
2878                                         unordered_check = code;
2879                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2880                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2881                                         x86_patch (unordered_check, code);
2882                                 } else {
2883                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2884                                 }
2885                                 break;
2886                         }
2887                         if (ins->dreg != X86_EAX) 
2888                                 x86_push_reg (code, X86_EAX);
2889
2890                         EMIT_FPCOMPARE(code);
2891                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4500);
2892                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x0100);
2893                         if (ins->opcode == OP_FCGT_UN) {
2894                                 guchar *is_not_zero_check, *end_jump;
2895                                 is_not_zero_check = code;
2896                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2897                                 end_jump = code;
2898                                 x86_jump8 (code, 0);
2899                                 x86_patch (is_not_zero_check, code);
2900                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4500);
2901
2902                                 x86_patch (end_jump, code);
2903                         }
2904                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2905                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2906
2907                         if (ins->dreg != X86_EAX) 
2908                                 x86_pop_reg (code, X86_EAX);
2909                         break;
2910                 case OP_FBEQ:
2911                         if (cfg->opt & MONO_OPT_FCMOV) {
2912                                 guchar *jump = code;
2913                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
2914                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2915                                 x86_patch (jump, code);
2916                                 break;
2917                         }
2918                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2919                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2920                         break;
2921                 case OP_FBNE_UN:
2922                         if (cfg->opt & MONO_OPT_FCMOV) {
2923                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2924                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2925                                 break;
2926                         }
2927                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2928                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2929                         break;
2930                 case OP_FBLT:
2931                         if (cfg->opt & MONO_OPT_FCMOV) {
2932                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2933                                 break;
2934                         }
2935                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2936                         break;
2937                 case OP_FBLT_UN:
2938                         if (cfg->opt & MONO_OPT_FCMOV) {
2939                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2940                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2941                                 break;
2942                         }
2943                         if (ins->opcode == OP_FBLT_UN) {
2944                                 guchar *is_not_zero_check, *end_jump;
2945                                 is_not_zero_check = code;
2946                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2947                                 end_jump = code;
2948                                 x86_jump8 (code, 0);
2949                                 x86_patch (is_not_zero_check, code);
2950                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4500);
2951
2952                                 x86_patch (end_jump, code);
2953                         }
2954                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2955                         break;
2956                 case OP_FBGT:
2957                 case OP_FBGT_UN:
2958                         if (cfg->opt & MONO_OPT_FCMOV) {
2959                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
2960                                 break;
2961                         }
2962                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x0100);
2963                         if (ins->opcode == OP_FBGT_UN) {
2964                                 guchar *is_not_zero_check, *end_jump;
2965                                 is_not_zero_check = code;
2966                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2967                                 end_jump = code;
2968                                 x86_jump8 (code, 0);
2969                                 x86_patch (is_not_zero_check, code);
2970                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4500);
2971
2972                                 x86_patch (end_jump, code);
2973                         }
2974                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2975                         break;
2976                 case OP_FBGE:
2977                 case OP_FBGE_UN:
2978                         if (cfg->opt & MONO_OPT_FCMOV) {
2979                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
2980                                 break;
2981                         }
2982                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2983                         break;
2984                 case OP_FBLE:
2985                 case OP_FBLE_UN:
2986                         if (cfg->opt & MONO_OPT_FCMOV) {
2987                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2988                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
2989                                 break;
2990                         }
2991                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x0100);
2992                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2993                         break;
2994                 case CEE_CKFINITE: {
2995                         x86_push_reg (code, X86_EAX);
2996                         x86_fxam (code);
2997                         x86_fnstsw (code);
2998                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
2999                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x0100);
3000                         x86_pop_reg (code, X86_EAX);
3001                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3002                         break;
3003                 }
3004                 default:
3005                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3006                         g_assert_not_reached ();
3007                 }
3008
3009                 if ((code - cfg->native_code - offset) > max_len) {
3010                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3011                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3012                         g_assert_not_reached ();
3013                 }
3014                
3015                 cpos += max_len;
3016
3017                 last_ins = ins;
3018                 last_offset = offset;
3019                 
3020                 ins = ins->next;
3021         }
3022
3023         cfg->code_len = code - cfg->native_code;
3024 }
3025
3026 void
3027 mono_arch_register_lowlevel_calls (void)
3028 {
3029         mono_register_jit_icall (enter_method, "mono_enter_method", NULL, TRUE);
3030         mono_register_jit_icall (leave_method, "mono_leave_method", NULL, TRUE);
3031 }
3032
3033 void
3034 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3035 {
3036         MonoJumpInfo *patch_info;
3037
3038         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3039                 unsigned char *ip = patch_info->ip.i + code;
3040                 const unsigned char *target = NULL;
3041
3042                 switch (patch_info->type) {
3043                 case MONO_PATCH_INFO_BB:
3044                         target = patch_info->data.bb->native_offset + code;
3045                         break;
3046                 case MONO_PATCH_INFO_ABS:
3047                         target = patch_info->data.target;
3048                         break;
3049                 case MONO_PATCH_INFO_LABEL:
3050                         target = patch_info->data.inst->inst_c0 + code;
3051                         break;
3052                 case MONO_PATCH_INFO_IP:
3053                         *((gpointer *)(ip)) = ip;
3054                         continue;
3055                 case MONO_PATCH_INFO_METHOD_REL:
3056                         *((gpointer *)(ip)) = code + patch_info->data.offset;
3057                         continue;
3058                 case MONO_PATCH_INFO_INTERNAL_METHOD: {
3059                         MonoJitICallInfo *mi = mono_find_jit_icall_by_name (patch_info->data.name);
3060                         if (!mi) {
3061                                 g_warning ("unknown MONO_PATCH_INFO_INTERNAL_METHOD %s", patch_info->data.name);
3062                                 g_assert_not_reached ();
3063                         }
3064                         target = mono_icall_get_wrapper (mi);
3065                         break;
3066                 }
3067                 case MONO_PATCH_INFO_METHOD_JUMP: {
3068                         GSList *list;
3069
3070                         /* get the trampoline to the method from the domain */
3071                         target = mono_arch_create_jump_trampoline (patch_info->data.method);
3072                         if (!domain->jump_target_hash)
3073                                 domain->jump_target_hash = g_hash_table_new (NULL, NULL);
3074                         list = g_hash_table_lookup (domain->jump_target_hash, patch_info->data.method);
3075                         list = g_slist_prepend (list, ip);
3076                         g_hash_table_insert (domain->jump_target_hash, patch_info->data.method, list);
3077                         break;
3078                 }
3079                 case MONO_PATCH_INFO_METHOD:
3080                         if (patch_info->data.method == method) {
3081                                 target = code;
3082                         } else
3083                                 /* get the trampoline to the method from the domain */
3084                                 target = mono_arch_create_jit_trampoline (patch_info->data.method);
3085                         break;
3086                 case MONO_PATCH_INFO_SWITCH: {
3087                         gpointer *jump_table = mono_mempool_alloc (domain->code_mp, sizeof (gpointer) * patch_info->table_size);
3088                         int i;
3089
3090                         *((gconstpointer *)(ip + 2)) = jump_table;
3091
3092                         for (i = 0; i < patch_info->table_size; i++) {
3093                                 jump_table [i] = code + (int)patch_info->data.table [i];
3094                         }
3095                         /* we put into the table the absolute address, no need for x86_patch in this case */
3096                         continue;
3097                 }
3098                 case MONO_PATCH_INFO_METHODCONST:
3099                 case MONO_PATCH_INFO_CLASS:
3100                 case MONO_PATCH_INFO_IMAGE:
3101                 case MONO_PATCH_INFO_FIELD:
3102                         *((gconstpointer *)(ip + 1)) = patch_info->data.target;
3103                         continue;
3104                 case MONO_PATCH_INFO_IID:
3105                         mono_class_init (patch_info->data.klass);
3106                         *((guint32 *)(ip + 1)) = patch_info->data.klass->interface_id;
3107                         continue;                       
3108                 case MONO_PATCH_INFO_VTABLE:
3109                         *((gconstpointer *)(ip + 1)) = mono_class_vtable (domain, patch_info->data.klass);
3110                         continue;
3111                 case MONO_PATCH_INFO_CLASS_INIT: {
3112                         guint8 *code = ip;
3113                         /* Might already been changed to a nop */
3114                         x86_call_imm (code, 0);
3115                         target = mono_create_class_init_trampoline (mono_class_vtable (domain, patch_info->data.klass));
3116                         break;
3117                 }
3118                 case MONO_PATCH_INFO_SFLDA: {
3119                         MonoVTable *vtable = mono_class_vtable (domain, patch_info->data.field->parent);
3120                         if (!vtable->initialized && !(vtable->klass->flags & TYPE_ATTRIBUTE_BEFORE_FIELD_INIT) && mono_class_needs_cctor_run (vtable->klass, method))
3121                                 /* Done by the generated code */
3122                                 ;
3123                         else {
3124                                 if (run_cctors)
3125                                         mono_runtime_class_init (vtable);
3126                         }
3127                         *((gconstpointer *)(ip + 1)) = 
3128                                 (char*)vtable->data + patch_info->data.field->offset;
3129                         continue;
3130                 }
3131                 case MONO_PATCH_INFO_R4:
3132                 case MONO_PATCH_INFO_R8:
3133                         *((gconstpointer *)(ip + 2)) = patch_info->data.target;
3134                         continue;
3135                 case MONO_PATCH_INFO_EXC_NAME:
3136                         *((gconstpointer *)(ip + 1)) = patch_info->data.name;
3137                         continue;
3138                 case MONO_PATCH_INFO_LDSTR:
3139                         *((gconstpointer *)(ip + 1)) = 
3140                                 mono_ldstr (domain, patch_info->data.token->image, 
3141                                                         mono_metadata_token_index (patch_info->data.token->token));
3142                         continue;
3143                 case MONO_PATCH_INFO_TYPE_FROM_HANDLE: {
3144                         gpointer handle;
3145                         MonoClass *handle_class;
3146
3147                         handle = mono_ldtoken (patch_info->data.token->image, 
3148                                                                    patch_info->data.token->token, &handle_class);
3149                         mono_class_init (handle_class);
3150                         mono_class_init (mono_class_from_mono_type (handle));
3151
3152                         *((gconstpointer *)(ip + 1)) = 
3153                                 mono_type_get_object (domain, handle);
3154                         continue;
3155                 }
3156                 case MONO_PATCH_INFO_LDTOKEN: {
3157                         gpointer handle;
3158                         MonoClass *handle_class;
3159
3160                         handle = mono_ldtoken (patch_info->data.token->image,
3161                                                                    patch_info->data.token->token, &handle_class);
3162                         mono_class_init (handle_class);
3163
3164                         *((gconstpointer *)(ip + 1)) = handle;
3165                         continue;
3166                 }
3167                 default:
3168                         g_assert_not_reached ();
3169                 }
3170                 x86_patch (ip, target);
3171         }
3172 }
3173
3174 int
3175 mono_arch_max_epilog_size (MonoCompile *cfg)
3176 {
3177         int exc_count = 0, max_epilog_size = 16;
3178         MonoJumpInfo *patch_info;
3179         
3180         if (cfg->method->save_lmf)
3181                 max_epilog_size += 128;
3182         
3183         if (mono_jit_trace_calls != NULL)
3184                 max_epilog_size += 50;
3185
3186         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3187                 max_epilog_size += 50;
3188
3189         /* count the number of exception infos */
3190      
3191         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3192                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3193                         exc_count++;
3194         }
3195
3196         /* 
3197          * make sure we have enough space for exceptions
3198          * 16 is the size of two push_imm instructions and a call
3199          */
3200         max_epilog_size += exc_count*16;
3201
3202         return max_epilog_size;
3203 }
3204
3205 guint8 *
3206 mono_arch_emit_prolog (MonoCompile *cfg)
3207 {
3208         MonoMethod *method = cfg->method;
3209         MonoBasicBlock *bb;
3210         MonoMethodSignature *sig;
3211         MonoInst *inst;
3212         int alloc_size, pos, max_offset, i;
3213         guint8 *code;
3214
3215         cfg->code_size =  MAX (((MonoMethodNormal *)method)->header->code_size * 4, 256);
3216         code = cfg->native_code = g_malloc (cfg->code_size);
3217
3218         x86_push_reg (code, X86_EBP);
3219         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3220
3221         alloc_size = - cfg->stack_offset;
3222         pos = 0;
3223
3224         if (method->save_lmf) {
3225                 pos += sizeof (MonoLMF);
3226
3227                 /* save the current IP */
3228                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3229                 x86_push_imm (code, 0);
3230
3231                 /* save all caller saved regs */
3232                 x86_push_reg (code, X86_EBX);
3233                 x86_push_reg (code, X86_EDI);
3234                 x86_push_reg (code, X86_ESI);
3235                 x86_push_reg (code, X86_EBP);
3236
3237                 /* save method info */
3238                 x86_push_imm (code, method);
3239
3240                 /* get the address of lmf for the current thread */
3241                 /* 
3242                  * This is performance critical so we try to use some tricks to make
3243                  * it fast.
3244                  */
3245                 if (lmf_tls_offset != -1) {
3246                         /* Load lmf quicky using the GS register */
3247                         x86_prefix (code, X86_GS_PREFIX);
3248                         x86_mov_reg_mem (code, X86_EAX, 0, 4);
3249                         x86_mov_reg_membase (code, X86_EAX, X86_EAX, lmf_tls_offset, 4);
3250                 }
3251                 else {
3252                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3253                                                                  (gpointer)"mono_get_lmf_addr");
3254                         x86_call_code (code, 0);
3255                 }
3256
3257                 /* push lmf */
3258                 x86_push_reg (code, X86_EAX); 
3259                 /* push *lfm (previous_lmf) */
3260                 x86_push_membase (code, X86_EAX, 0);
3261                 /* *(lmf) = ESP */
3262                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3263         } else {
3264
3265                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3266                         x86_push_reg (code, X86_EBX);
3267                         pos += 4;
3268                 }
3269
3270                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3271                         x86_push_reg (code, X86_EDI);
3272                         pos += 4;
3273                 }
3274
3275                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3276                         x86_push_reg (code, X86_ESI);
3277                         pos += 4;
3278                 }
3279         }
3280
3281         alloc_size -= pos;
3282
3283         if (alloc_size) {
3284                 /* See mono_emit_stack_alloc */
3285 #ifdef PLATFORM_WIN32
3286                 guint32 remaining_size = alloc_size;
3287                 while (remaining_size >= 0x1000) {
3288                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3289                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3290                         remaining_size -= 0x1000;
3291                 }
3292                 if (remaining_size)
3293                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3294 #else
3295                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3296 #endif
3297         }
3298
3299         /* compute max_offset in order to use short forward jumps */
3300         max_offset = 0;
3301         if (cfg->opt & MONO_OPT_BRANCH) {
3302                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3303                         MonoInst *ins = bb->code;
3304                         bb->max_offset = max_offset;
3305
3306                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3307                                 max_offset += 6; 
3308
3309                         while (ins) {
3310                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3311                                 ins = ins->next;
3312                         }
3313                 }
3314         }
3315
3316         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3317                 code = mono_arch_instrument_prolog (cfg, enter_method, code, TRUE);
3318
3319         /* load arguments allocated to register from the stack */
3320         sig = method->signature;
3321         pos = 0;
3322
3323         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3324                 inst = cfg->varinfo [pos];
3325                 if (inst->opcode == OP_REGVAR) {
3326                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3327                         if (cfg->verbose_level > 2)
3328                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3329                 }
3330                 pos++;
3331         }
3332
3333         cfg->code_len = code - cfg->native_code;
3334
3335         return code;
3336 }
3337
3338 void
3339 mono_arch_emit_epilog (MonoCompile *cfg)
3340 {
3341         MonoJumpInfo *patch_info;
3342         MonoMethod *method = cfg->method;
3343         MonoMethodSignature *sig = method->signature;
3344         int pos;
3345         guint32 stack_to_pop;
3346         guint8 *code;
3347
3348         code = cfg->native_code + cfg->code_len;
3349
3350         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3351                 code = mono_arch_instrument_epilog (cfg, leave_method, code, TRUE);
3352
3353         /* the code restoring the registers must be kept in sync with CEE_JMP */
3354         pos = 0;
3355         
3356         if (method->save_lmf) {
3357                 pos = -sizeof (MonoLMF);
3358         } else {
3359                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3360                         pos -= 4;
3361                 }
3362                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3363                         pos -= 4;
3364                 }
3365                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3366                         pos -= 4;
3367                 }
3368         }
3369
3370         if (pos)
3371                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3372         
3373         if (method->save_lmf) {
3374                 /* ebx = previous_lmf */
3375                 x86_pop_reg (code, X86_EBX);
3376                 /* edi = lmf */
3377                 x86_pop_reg (code, X86_EDI);
3378                 /* *(lmf) = previous_lmf */
3379                 x86_mov_membase_reg (code, X86_EDI, 0, X86_EBX, 4);
3380
3381                 /* discard method info */
3382                 x86_pop_reg (code, X86_ESI);
3383
3384                 /* restore caller saved regs */
3385                 x86_pop_reg (code, X86_EBP);
3386                 x86_pop_reg (code, X86_ESI);
3387                 x86_pop_reg (code, X86_EDI);
3388                 x86_pop_reg (code, X86_EBX);
3389
3390         } else {
3391
3392                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3393                         x86_pop_reg (code, X86_ESI);
3394                 }
3395                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3396                         x86_pop_reg (code, X86_EDI);
3397                 }
3398                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3399                         x86_pop_reg (code, X86_EBX);
3400                 }
3401         }
3402
3403         x86_leave (code);
3404
3405         if (sig->call_convention == MONO_CALL_STDCALL) {
3406           MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3407
3408           stack_to_pop = arch_get_argument_info (sig, sig->param_count, arg_info);
3409         }
3410         else
3411         if (MONO_TYPE_ISSTRUCT (cfg->method->signature->ret))
3412           stack_to_pop = 4;
3413         else
3414           stack_to_pop = 0;
3415
3416         if (stack_to_pop)
3417                 x86_ret_imm (code, stack_to_pop);
3418         else
3419                 x86_ret (code);
3420
3421         /* add code to raise exceptions */
3422         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3423                 switch (patch_info->type) {
3424                 case MONO_PATCH_INFO_EXC:
3425                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3426                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);
3427                         x86_push_imm (code, patch_info->data.target);
3428                         mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_METHOD_REL, (gpointer)patch_info->ip.i);
3429                         x86_push_imm (code, patch_info->ip.i + cfg->native_code);
3430                         patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3431                         patch_info->data.name = "mono_arch_throw_exception_by_name";
3432                         patch_info->ip.i = code - cfg->native_code;
3433                         x86_jump_code (code, 0);
3434                         break;
3435                 default:
3436                         /* do nothing */
3437                         break;
3438                 }
3439         }
3440
3441         cfg->code_len = code - cfg->native_code;
3442
3443         g_assert (cfg->code_len < cfg->code_size);
3444
3445 }
3446
3447 void
3448 mono_arch_flush_icache (guint8 *code, gint size)
3449 {
3450         /* not needed */
3451 }
3452
3453 /*
3454  * Support for fast access to the thread-local lmf structure using the GS
3455  * segment register on NPTL + kernel 2.6.x.
3456  */
3457
3458 static gboolean tls_offset_inited = FALSE;
3459
3460 #ifdef HAVE_KW_THREAD
3461 static __thread gpointer mono_lmf_addr;
3462 #endif
3463
3464 static gpointer
3465 mono_arch_get_lmf_addr (void)
3466 {
3467 #ifdef HAVE_KW_THREAD
3468         return mono_lmf_addr;
3469 #else
3470         g_assert_not_reached ();
3471         return NULL;
3472 #endif
3473 }
3474
3475 void
3476 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3477 {
3478         if (!tls_offset_inited) {
3479                 guint8 *code;
3480
3481                 tls_offset_inited = TRUE;
3482
3483                 if (getenv ("MONO_NPTL")) {
3484                         /* 
3485                          * Determine the offset of mono_lfm_addr inside the TLS structures
3486                          * by disassembling the function above.
3487                          */
3488                         code = (guint8*)&mono_arch_get_lmf_addr;
3489
3490                         /* This is generated by gcc 3.3.2 */
3491                         if ((code [0] == 0x55) && (code [1] == 0x89) && (code [2] == 0xe5) &&
3492                                 (code [3] == 0x65) && (code [4] == 0xa1) && (code [5] == 0x00) &&
3493                                 (code [6] == 0x00) && (code [7] == 0x00) && (code [8] == 0x00) &&
3494                                 (code [9] == 0x8b) && (code [10] == 0x80)) {
3495                                 lmf_tls_offset = *(int*)&(code [11]);
3496                         }
3497                 }
3498         }               
3499
3500 #ifdef HAVE_KW_THREAD
3501         mono_lmf_addr = &tls->lmf;
3502 #endif
3503 }
3504
3505 void
3506 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3507 {
3508
3509         /* add the this argument */
3510         if (this_reg != -1) {
3511                 MonoInst *this;
3512                 MONO_INST_NEW (cfg, this, OP_OUTARG);
3513                 this->type = this_type;
3514                 this->sreg1 = this_reg;
3515                 mono_bblock_add_inst (cfg->cbb, this);
3516         }
3517
3518         if (vt_reg != -1) {
3519                 MonoInst *vtarg;
3520                 MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3521                 vtarg->type = STACK_MP;
3522                 vtarg->sreg1 = vt_reg;
3523                 mono_bblock_add_inst (cfg->cbb, vtarg);
3524         }
3525 }
3526
3527
3528 gint
3529 mono_arch_get_opcode_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3530 {
3531         if (cmethod->klass == mono_defaults.math_class) {
3532                 if (strcmp (cmethod->name, "Sin") == 0)
3533                         return OP_SIN;
3534                 else if (strcmp (cmethod->name, "Cos") == 0)
3535                         return OP_COS;
3536                 else if (strcmp (cmethod->name, "Tan") == 0)
3537                         return OP_TAN;
3538                 else if (strcmp (cmethod->name, "Atan") == 0)
3539                         return OP_ATAN;
3540                 else if (strcmp (cmethod->name, "Sqrt") == 0)
3541                         return OP_SQRT;
3542                 else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8)
3543                         return OP_ABS;
3544 #if 0
3545                 /* OP_FREM is not IEEE compatible */
3546                 else if (strcmp (cmethod->name, "IEEERemainder") == 0)
3547                         return OP_FREM;
3548 #endif
3549                 else
3550                         return -1;
3551         } else {
3552                 return -1;
3553         }
3554         return -1;
3555 }
3556
3557