Fri Nov 14 21:34:06 CET 2003 Paolo Molaro <lupus@ximian.com>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *
8  * (C) 2003 Ximian, Inc.
9  */
10 #include "mini.h"
11 #include <string.h>
12 #include <math.h>
13
14 #include <mono/metadata/appdomain.h>
15 #include <mono/metadata/debug-helpers.h>
16 #include <mono/metadata/profiler-private.h>
17 #include <mono/utils/mono-math.h>
18
19 #include "mini-x86.h"
20 #include "inssel.h"
21 #include "cpu-pentium.h"
22
23 static gint lmf_tls_offset = -1;
24
25 const char*
26 mono_arch_regname (int reg) {
27         switch (reg) {
28         case X86_EAX: return "%eax";
29         case X86_EBX: return "%ebx";
30         case X86_ECX: return "%ecx";
31         case X86_EDX: return "%edx";
32         case X86_ESP: return "%esp";
33         case X86_EBP: return "%ebp";
34         case X86_EDI: return "%edi";
35         case X86_ESI: return "%esi";
36         }
37         return "unknown";
38 }
39
40 typedef struct {
41         guint16 size;
42         guint16 offset;
43         guint8  pad;
44 } MonoJitArgumentInfo;
45
46 /*
47  * arch_get_argument_info:
48  * @csig:  a method signature
49  * @param_count: the number of parameters to consider
50  * @arg_info: an array to store the result infos
51  *
52  * Gathers information on parameters such as size, alignment and
53  * padding. arg_info should be large enought to hold param_count + 1 entries. 
54  *
55  * Returns the size of the activation frame.
56  */
57 static int
58 arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
59 {
60         int k, frame_size = 0;
61         int size, align, pad;
62         int offset = 8;
63
64         if (MONO_TYPE_ISSTRUCT (csig->ret)) { 
65                 frame_size += sizeof (gpointer);
66                 offset += 4;
67         }
68
69         arg_info [0].offset = offset;
70
71         if (csig->hasthis) {
72                 frame_size += sizeof (gpointer);
73                 offset += 4;
74         }
75
76         arg_info [0].size = frame_size;
77
78         for (k = 0; k < param_count; k++) {
79                 
80                 if (csig->pinvoke)
81                         size = mono_type_native_stack_size (csig->params [k], &align);
82                 else
83                         size = mono_type_stack_size (csig->params [k], &align);
84
85                 /* ignore alignment for now */
86                 align = 1;
87
88                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
89                 arg_info [k].pad = pad;
90                 frame_size += size;
91                 arg_info [k + 1].pad = 0;
92                 arg_info [k + 1].size = size;
93                 offset += pad;
94                 arg_info [k + 1].offset = offset;
95                 offset += size;
96         }
97
98         align = MONO_ARCH_FRAME_ALIGNMENT;
99         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
100         arg_info [k].pad = pad;
101
102         return frame_size;
103 }
104
105 static int indent_level = 0;
106
107 static void indent (int diff) {
108         int v = indent_level;
109         while (v-- > 0) {
110                 printf (". ");
111         }
112         indent_level += diff;
113 }
114
115 static void
116 enter_method (MonoMethod *method, char *ebp)
117 {
118         int i, j;
119         MonoClass *class;
120         MonoObject *o;
121         MonoJitArgumentInfo *arg_info;
122         MonoMethodSignature *sig;
123         char *fname;
124
125         fname = mono_method_full_name (method, TRUE);
126         indent (1);
127         printf ("ENTER: %s(", fname);
128         g_free (fname);
129         
130         if (((int)ebp & (MONO_ARCH_FRAME_ALIGNMENT - 1)) != 0) {
131                 g_error ("unaligned stack detected (%p)", ebp);
132         }
133
134         sig = method->signature;
135
136         arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
137
138         arch_get_argument_info (sig, sig->param_count, arg_info);
139
140         if (MONO_TYPE_ISSTRUCT (method->signature->ret)) {
141                 g_assert (!method->signature->ret->byref);
142
143                 printf ("VALUERET:%p, ", *((gpointer *)(ebp + 8)));
144         }
145
146         if (method->signature->hasthis) {
147                 gpointer *this = (gpointer *)(ebp + arg_info [0].offset);
148                 if (method->klass->valuetype) {
149                         printf ("value:%p, ", *this);
150                 } else {
151                         o = *((MonoObject **)this);
152
153                         if (o) {
154                                 class = o->vtable->klass;
155
156                                 if (class == mono_defaults.string_class) {
157                                         printf ("this:[STRING:%p:%s], ", o, mono_string_to_utf8 ((MonoString *)o));
158                                 } else {
159                                         printf ("this:%p[%s.%s %s], ", o, class->name_space, class->name, o->vtable->domain->friendly_name);
160                                 }
161                         } else 
162                                 printf ("this:NULL, ");
163                 }
164         }
165
166         for (i = 0; i < method->signature->param_count; ++i) {
167                 gpointer *cpos = (gpointer *)(ebp + arg_info [i + 1].offset);
168                 int size = arg_info [i + 1].size;
169
170                 MonoType *type = method->signature->params [i];
171                 
172                 if (type->byref) {
173                         printf ("[BYREF:%p], ", *cpos); 
174                 } else switch (type->type) {
175                         
176                 case MONO_TYPE_I:
177                 case MONO_TYPE_U:
178                         printf ("%p, ", (gpointer)*((int *)(cpos)));
179                         break;
180                 case MONO_TYPE_BOOLEAN:
181                 case MONO_TYPE_CHAR:
182                 case MONO_TYPE_I1:
183                 case MONO_TYPE_U1:
184                 case MONO_TYPE_I2:
185                 case MONO_TYPE_U2:
186                 case MONO_TYPE_I4:
187                 case MONO_TYPE_U4:
188                         printf ("%d, ", *((int *)(cpos)));
189                         break;
190                 case MONO_TYPE_STRING: {
191                         MonoString *s = *((MonoString **)cpos);
192                         if (s) {
193                                 g_assert (((MonoObject *)s)->vtable->klass == mono_defaults.string_class);
194                                 printf ("[STRING:%p:%s], ", s, mono_string_to_utf8 (s));
195                         } else 
196                                 printf ("[STRING:null], ");
197                         break;
198                 }
199                 case MONO_TYPE_CLASS:
200                 case MONO_TYPE_OBJECT: {
201                         o = *((MonoObject **)cpos);
202                         if (o) {
203                                 class = o->vtable->klass;
204                     
205                                 if (class == mono_defaults.string_class) {
206                                         printf ("[STRING:%p:%s], ", o, mono_string_to_utf8 ((MonoString *)o));
207                                 } else if (class == mono_defaults.int32_class) {
208                                         printf ("[INT32:%p:%d], ", o, *(gint32 *)((char *)o + sizeof (MonoObject)));
209                                 } else
210                                         printf ("[%s.%s:%p], ", class->name_space, class->name, o);
211                         } else {
212                                 printf ("%p, ", *((gpointer *)(cpos)));                         
213                         }
214                         break;
215                 }
216                 case MONO_TYPE_PTR:
217                 case MONO_TYPE_FNPTR:
218                 case MONO_TYPE_ARRAY:
219                 case MONO_TYPE_SZARRAY:
220                         printf ("%p, ", *((gpointer *)(cpos)));
221                         break;
222                 case MONO_TYPE_I8:
223                 case MONO_TYPE_U8:
224                         printf ("0x%016llx, ", *((gint64 *)(cpos)));
225                         break;
226                 case MONO_TYPE_R4:
227                         printf ("%f, ", *((float *)(cpos)));
228                         break;
229                 case MONO_TYPE_R8:
230                         printf ("%f, ", *((double *)(cpos)));
231                         break;
232                 case MONO_TYPE_VALUETYPE: 
233                         printf ("[");
234                         for (j = 0; j < size; j++)
235                                 printf ("%02x,", *((guint8*)cpos +j));
236                         printf ("], ");
237                         break;
238                 default:
239                         printf ("XX, ");
240                 }
241         }
242
243         printf (")\n");
244 }
245
246 static void
247 leave_method (MonoMethod *method, ...)
248 {
249         MonoType *type;
250         char *fname;
251         va_list ap;
252
253         va_start(ap, method);
254
255         fname = mono_method_full_name (method, TRUE);
256         indent (-1);
257         printf ("LEAVE: %s", fname);
258         g_free (fname);
259
260         type = method->signature->ret;
261
262 handle_enum:
263         switch (type->type) {
264         case MONO_TYPE_VOID:
265                 break;
266         case MONO_TYPE_BOOLEAN: {
267                 int eax = va_arg (ap, int);
268                 if (eax)
269                         printf ("TRUE:%d", eax);
270                 else 
271                         printf ("FALSE");
272                         
273                 break;
274         }
275         case MONO_TYPE_CHAR:
276         case MONO_TYPE_I1:
277         case MONO_TYPE_U1:
278         case MONO_TYPE_I2:
279         case MONO_TYPE_U2:
280         case MONO_TYPE_I4:
281         case MONO_TYPE_U4:
282         case MONO_TYPE_I:
283         case MONO_TYPE_U: {
284                 int eax = va_arg (ap, int);
285                 printf ("EAX=%d", eax);
286                 break;
287         }
288         case MONO_TYPE_STRING: {
289                 MonoString *s = va_arg (ap, MonoString *);
290 ;
291                 if (s) {
292                         g_assert (((MonoObject *)s)->vtable->klass == mono_defaults.string_class);
293                         printf ("[STRING:%p:%s]", s, mono_string_to_utf8 (s));
294                 } else 
295                         printf ("[STRING:null], ");
296                 break;
297         }
298         case MONO_TYPE_CLASS: 
299         case MONO_TYPE_OBJECT: {
300                 MonoObject *o = va_arg (ap, MonoObject *);
301
302                 if (o) {
303                         if (o->vtable->klass == mono_defaults.boolean_class) {
304                                 printf ("[BOOLEAN:%p:%d]", o, *((guint8 *)o + sizeof (MonoObject)));            
305                         } else if  (o->vtable->klass == mono_defaults.int32_class) {
306                                 printf ("[INT32:%p:%d]", o, *((gint32 *)((char *)o + sizeof (MonoObject))));    
307                         } else if  (o->vtable->klass == mono_defaults.int64_class) {
308                                 printf ("[INT64:%p:%lld]", o, *((gint64 *)((char *)o + sizeof (MonoObject))));  
309                         } else
310                                 printf ("[%s.%s:%p]", o->vtable->klass->name_space, o->vtable->klass->name, o);
311                 } else
312                         printf ("[OBJECT:%p]", o);
313                
314                 break;
315         }
316         case MONO_TYPE_PTR:
317         case MONO_TYPE_FNPTR:
318         case MONO_TYPE_ARRAY:
319         case MONO_TYPE_SZARRAY: {
320                 gpointer p = va_arg (ap, gpointer);
321                 printf ("EAX=%p", p);
322                 break;
323         }
324         case MONO_TYPE_I8: {
325                 gint64 l =  va_arg (ap, gint64);
326                 printf ("EAX/EDX=0x%16llx", l);
327                 break;
328         }
329         case MONO_TYPE_U8: {
330                 gint64 l =  va_arg (ap, gint64);
331                 printf ("EAX/EDX=0x%16llx", l);
332                 break;
333         }
334         case MONO_TYPE_R8: {
335                 double f = va_arg (ap, double);
336                 printf ("FP=%f\n", f);
337                 break;
338         }
339         case MONO_TYPE_VALUETYPE: 
340                 if (type->data.klass->enumtype) {
341                         type = type->data.klass->enum_basetype;
342                         goto handle_enum;
343                 } else {
344                         guint8 *p = va_arg (ap, gpointer);
345                         int j, size, align;
346                         size = mono_type_size (type, &align);
347                         printf ("[");
348                         for (j = 0; p && j < size; j++)
349                                 printf ("%02x,", p [j]);
350                         printf ("]");
351                 }
352                 break;
353         default:
354                 printf ("(unknown return type %x)", method->signature->ret->type);
355         }
356
357         printf ("\n");
358 }
359
360 static const guchar cpuid_impl [] = {
361         0x55,                           /* push   %ebp */
362         0x89, 0xe5,                     /* mov    %esp,%ebp */
363         0x53,                           /* push   %ebx */
364         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
365         0x0f, 0xa2,                     /* cpuid   */
366         0x50,                           /* push   %eax */
367         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
368         0x89, 0x18,                     /* mov    %ebx,(%eax) */
369         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
370         0x89, 0x08,                     /* mov    %ecx,(%eax) */
371         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
372         0x89, 0x10,                     /* mov    %edx,(%eax) */
373         0x58,                           /* pop    %eax */
374         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
375         0x89, 0x02,                     /* mov    %eax,(%edx) */
376         0x5b,                           /* pop    %ebx */
377         0xc9,                           /* leave   */
378         0xc3,                           /* ret     */
379 };
380
381 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
382
383 static int 
384 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
385 {
386         int have_cpuid = 0;
387         __asm__  __volatile__ (
388                 "pushfl\n"
389                 "popl %%eax\n"
390                 "movl %%eax, %%edx\n"
391                 "xorl $0x200000, %%eax\n"
392                 "pushl %%eax\n"
393                 "popfl\n"
394                 "pushfl\n"
395                 "popl %%eax\n"
396                 "xorl %%edx, %%eax\n"
397                 "andl $0x200000, %%eax\n"
398                 "movl %%eax, %0"
399                 : "=r" (have_cpuid)
400                 :
401                 : "%eax", "%edx"
402         );
403
404         if (have_cpuid) {
405                 CpuidFunc func = (CpuidFunc)cpuid_impl;
406                 func (id, p_eax, p_ebx, p_ecx, p_edx);
407                 /*
408                  * We use this approach because of issues with gcc and pic code, see:
409                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
410                 __asm__ __volatile__ ("cpuid"
411                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
412                         : "a" (id));
413                 */
414                 return 1;
415         }
416         return 0;
417 }
418
419 /*
420  * Initialize the cpu to execute managed code.
421  */
422 void
423 mono_arch_cpu_init (void)
424 {
425         guint16 fpcw;
426
427         /* spec compliance requires running with double precision */
428         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
429         fpcw &= ~X86_FPCW_PRECC_MASK;
430         fpcw |= X86_FPCW_PREC_DOUBLE;
431         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
432         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
433
434 }
435
436 /*
437  * This function returns the optimizations supported on this cpu.
438  */
439 guint32
440 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
441 {
442         int eax, ebx, ecx, edx;
443         guint32 opts = 0;
444         
445         *exclude_mask = 0;
446         /* Feature Flags function, flags returned in EDX. */
447         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
448                 if (edx & (1 << 15)) {
449                         opts |= MONO_OPT_CMOV;
450                         if (edx & 1)
451                                 opts |= MONO_OPT_FCMOV;
452                         else
453                                 *exclude_mask |= MONO_OPT_FCMOV;
454                 } else
455                         *exclude_mask |= MONO_OPT_CMOV;
456         }
457         return opts;
458 }
459
460 static gboolean
461 is_regsize_var (MonoType *t) {
462         if (t->byref)
463                 return TRUE;
464         switch (t->type) {
465         case MONO_TYPE_I4:
466         case MONO_TYPE_U4:
467         case MONO_TYPE_I:
468         case MONO_TYPE_U:
469                 return TRUE;
470         case MONO_TYPE_OBJECT:
471         case MONO_TYPE_STRING:
472         case MONO_TYPE_CLASS:
473         case MONO_TYPE_SZARRAY:
474         case MONO_TYPE_ARRAY:
475                 return TRUE;
476         case MONO_TYPE_VALUETYPE:
477                 if (t->data.klass->enumtype)
478                         return is_regsize_var (t->data.klass->enum_basetype);
479                 return FALSE;
480         }
481         return FALSE;
482 }
483
484 GList *
485 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
486 {
487         GList *vars = NULL;
488         int i;
489
490         for (i = 0; i < cfg->num_varinfo; i++) {
491                 MonoInst *ins = cfg->varinfo [i];
492                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
493
494                 /* unused vars */
495                 if (vmv->range.first_use.abs_pos > vmv->range.last_use.abs_pos)
496                         continue;
497
498                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
499                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
500                         continue;
501
502                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
503                  * 8bit quantities in caller saved registers on x86 */
504                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
505                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
506                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
507                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
508                         g_assert (i == vmv->idx);
509                         vars = mono_varlist_insert_sorted (cfg, vars, vmv, FALSE);
510                 }
511         }
512
513         return vars;
514 }
515
516 GList *
517 mono_arch_get_global_int_regs (MonoCompile *cfg)
518 {
519         GList *regs = NULL;
520
521         /* we can use 3 registers for global allocation */
522         regs = g_list_prepend (regs, (gpointer)X86_EBX);
523         regs = g_list_prepend (regs, (gpointer)X86_ESI);
524         regs = g_list_prepend (regs, (gpointer)X86_EDI);
525
526         return regs;
527 }
528  
529 /*
530  * Set var information according to the calling convention. X86 version.
531  * The locals var stuff should most likely be split in another method.
532  */
533 void
534 mono_arch_allocate_vars (MonoCompile *m)
535 {
536         MonoMethodSignature *sig;
537         MonoMethodHeader *header;
538         MonoInst *inst;
539         int i, offset, size, align, curinst;
540
541         header = ((MonoMethodNormal *)m->method)->header;
542
543         sig = m->method->signature;
544
545         offset = 8;
546         curinst = 0;
547         if (MONO_TYPE_ISSTRUCT (sig->ret)) {
548                 m->ret->opcode = OP_REGOFFSET;
549                 m->ret->inst_basereg = X86_EBP;
550                 m->ret->inst_offset = offset;
551                 offset += sizeof (gpointer);
552         } else {
553                 /* FIXME: handle long and FP values */
554                 switch (sig->ret->type) {
555                 case MONO_TYPE_VOID:
556                         break;
557                 default:
558                         m->ret->opcode = OP_REGVAR;
559                         m->ret->inst_c0 = X86_EAX;
560                         break;
561                 }
562         }
563         if (sig->hasthis) {
564                 inst = m->varinfo [curinst];
565                 if (inst->opcode != OP_REGVAR) {
566                         inst->opcode = OP_REGOFFSET;
567                         inst->inst_basereg = X86_EBP;
568                 }
569                 inst->inst_offset = offset;
570                 offset += sizeof (gpointer);
571                 curinst++;
572         }
573
574         if (sig->call_convention == MONO_CALL_VARARG) {
575                 m->sig_cookie = offset;
576                 offset += sizeof (gpointer);
577         }
578
579         for (i = 0; i < sig->param_count; ++i) {
580                 inst = m->varinfo [curinst];
581                 if (inst->opcode != OP_REGVAR) {
582                         inst->opcode = OP_REGOFFSET;
583                         inst->inst_basereg = X86_EBP;
584                 }
585                 inst->inst_offset = offset;
586                 size = mono_type_size (sig->params [i], &align);
587                 size += 4 - 1;
588                 size &= ~(4 - 1);
589                 offset += size;
590                 curinst++;
591         }
592
593         offset = 0;
594
595         /* reserve space to save LMF and caller saved registers */
596
597         if (m->method->save_lmf) {
598                 offset += sizeof (MonoLMF);
599         } else {
600                 if (m->used_int_regs & (1 << X86_EBX)) {
601                         offset += 4;
602                 }
603
604                 if (m->used_int_regs & (1 << X86_EDI)) {
605                         offset += 4;
606                 }
607
608                 if (m->used_int_regs & (1 << X86_ESI)) {
609                         offset += 4;
610                 }
611         }
612
613         for (i = curinst; i < m->num_varinfo; ++i) {
614                 inst = m->varinfo [i];
615
616                 if ((inst->flags & MONO_INST_IS_DEAD) || inst->opcode == OP_REGVAR)
617                         continue;
618
619                 /* inst->unused indicates native sized value types, this is used by the
620                 * pinvoke wrappers when they call functions returning structure */
621                 if (inst->unused && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF)
622                         size = mono_class_native_size (inst->inst_vtype->data.klass, &align);
623                 else
624                         size = mono_type_size (inst->inst_vtype, &align);
625
626                 offset += size;
627                 offset += align - 1;
628                 offset &= ~(align - 1);
629                 inst->opcode = OP_REGOFFSET;
630                 inst->inst_basereg = X86_EBP;
631                 inst->inst_offset = -offset;
632                 //g_print ("allocating local %d to %d\n", i, -offset);
633         }
634         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
635         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
636
637         /* change sign? */
638         m->stack_offset = -offset;
639 }
640
641 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
642  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
643  */
644
645 /* 
646  * take the arguments and generate the arch-specific
647  * instructions to properly call the function in call.
648  * This includes pushing, moving arguments to the right register
649  * etc.
650  * Issue: who does the spilling if needed, and when?
651  */
652 MonoCallInst*
653 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
654         MonoInst *arg, *in;
655         MonoMethodSignature *sig;
656         int i, n, stack_size, type;
657         MonoType *ptype;
658
659         stack_size = 0;
660         /* add the vararg cookie before the non-implicit args */
661         if (call->signature->call_convention == MONO_CALL_VARARG) {
662                 MonoInst *sig_arg;
663                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
664                 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
665                 sig_arg->inst_p0 = call->signature;
666                 arg->inst_left = sig_arg;
667                 arg->type = STACK_PTR;
668                 /* prepend, so they get reversed */
669                 arg->next = call->out_args;
670                 call->out_args = arg;
671                 stack_size += sizeof (gpointer);
672         }
673         sig = call->signature;
674         n = sig->param_count + sig->hasthis;
675
676         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
677                 stack_size += sizeof (gpointer);
678         for (i = 0; i < n; ++i) {
679                 if (is_virtual && i == 0) {
680                         /* the argument will be attached to the call instrucion */
681                         in = call->args [i];
682                         stack_size += 4;
683                 } else {
684                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
685                         in = call->args [i];
686                         arg->cil_code = in->cil_code;
687                         arg->inst_left = in;
688                         arg->type = in->type;
689                         /* prepend, so they get reversed */
690                         arg->next = call->out_args;
691                         call->out_args = arg;
692                         if (i >= sig->hasthis) {
693                                 ptype = sig->params [i - sig->hasthis];
694                                 if (ptype->byref)
695                                         type = MONO_TYPE_U;
696                                 else
697                                         type = ptype->type;
698 handle_enum:
699                                 /* FIXME: validate arguments... */
700                                 switch (type) {
701                                 case MONO_TYPE_I:
702                                 case MONO_TYPE_U:
703                                 case MONO_TYPE_BOOLEAN:
704                                 case MONO_TYPE_CHAR:
705                                 case MONO_TYPE_I1:
706                                 case MONO_TYPE_U1:
707                                 case MONO_TYPE_I2:
708                                 case MONO_TYPE_U2:
709                                 case MONO_TYPE_I4:
710                                 case MONO_TYPE_U4:
711                                 case MONO_TYPE_STRING:
712                                 case MONO_TYPE_CLASS:
713                                 case MONO_TYPE_OBJECT:
714                                 case MONO_TYPE_PTR:
715                                 case MONO_TYPE_FNPTR:
716                                 case MONO_TYPE_ARRAY:
717                                 case MONO_TYPE_SZARRAY:
718                                         stack_size += 4;
719                                         break;
720                                 case MONO_TYPE_I8:
721                                 case MONO_TYPE_U8:
722                                         stack_size += 8;
723                                         break;
724                                 case MONO_TYPE_R4:
725                                         stack_size += 4;
726                                         arg->opcode = OP_OUTARG_R4;
727                                         break;
728                                 case MONO_TYPE_R8:
729                                         stack_size += 8;
730                                         arg->opcode = OP_OUTARG_R8;
731                                         break;
732                                 case MONO_TYPE_VALUETYPE:
733                                         if (MONO_TYPE_ISSTRUCT (ptype)) {
734                                                 int size;
735                                                 if (sig->pinvoke) 
736                                                         size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
737                                                 else 
738                                                         size = mono_type_stack_size (&in->klass->byval_arg, NULL);
739
740                                                 stack_size += size;
741                                                 arg->opcode = OP_OUTARG_VT;
742                                                 arg->klass = in->klass;
743                                                 arg->unused = sig->pinvoke;
744                                                 arg->inst_imm = size; 
745                                         } else {
746                                                 type = ptype->data.klass->enum_basetype->type;
747                                                 goto handle_enum;
748                                         }
749                                         break;
750                                 case MONO_TYPE_TYPEDBYREF:
751                                         stack_size += sizeof (MonoTypedRef);
752                                         arg->opcode = OP_OUTARG_VT;
753                                         arg->klass = in->klass;
754                                         arg->unused = sig->pinvoke;
755                                         arg->inst_imm = sizeof (MonoTypedRef); 
756                                         break;
757                                 case MONO_TYPE_GENERICINST:
758                                         type = ptype->data.generic_inst->generic_type->type;
759                                         goto handle_enum;
760
761                                 default:
762                                         g_error ("unknown type 0x%02x in mono_arch_call_opcode\n", type);
763                                 }
764                         } else {
765                                 /* the this argument */
766                                 stack_size += 4;
767                         }
768                 }
769         }
770         /* if the function returns a struct, the called method already does a ret $0x4 */
771         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
772                 stack_size -= 4;
773         call->stack_usage = stack_size;
774         /* 
775          * should set more info in call, such as the stack space
776          * used by the args that needs to be added back to esp
777          */
778
779         return call;
780 }
781
782 /*
783  * Allow tracing to work with this interface (with an optional argument)
784  */
785
786 /*
787  * This may be needed on some archs or for debugging support.
788  */
789 void
790 mono_arch_instrument_mem_needs (MonoMethod *method, int *stack, int *code)
791 {
792         /* no stack room needed now (may be needed for FASTCALL-trace support) */
793         *stack = 0;
794         /* split prolog-epilog requirements? */
795         *code = 50; /* max bytes needed: check this number */
796 }
797
798 void*
799 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
800 {
801         guchar *code = p;
802
803         /* if some args are passed in registers, we need to save them here */
804         x86_push_reg (code, X86_EBP);
805         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
806         x86_push_imm (code, cfg->method);
807         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
808         x86_call_code (code, 0);
809         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
810
811         return code;
812 }
813
814 enum {
815         SAVE_NONE,
816         SAVE_STRUCT,
817         SAVE_EAX,
818         SAVE_EAX_EDX,
819         SAVE_FP
820 };
821
822 void*
823 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
824 {
825         guchar *code = p;
826         int arg_size = 0, save_mode = SAVE_NONE;
827         MonoMethod *method = cfg->method;
828         int rtype = method->signature->ret->type;
829         
830 handle_enum:
831         switch (rtype) {
832         case MONO_TYPE_VOID:
833                 /* special case string .ctor icall */
834                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
835                         save_mode = SAVE_EAX;
836                 else
837                         save_mode = SAVE_NONE;
838                 break;
839         case MONO_TYPE_I8:
840         case MONO_TYPE_U8:
841                 save_mode = SAVE_EAX_EDX;
842                 break;
843         case MONO_TYPE_R4:
844         case MONO_TYPE_R8:
845                 save_mode = SAVE_FP;
846                 break;
847         case MONO_TYPE_VALUETYPE:
848                 if (method->signature->ret->data.klass->enumtype) {
849                         rtype = method->signature->ret->data.klass->enum_basetype->type;
850                         goto handle_enum;
851                 }
852                 save_mode = SAVE_STRUCT;
853                 break;
854         default:
855                 save_mode = SAVE_EAX;
856                 break;
857         }
858
859         switch (save_mode) {
860         case SAVE_EAX_EDX:
861                 x86_push_reg (code, X86_EDX);
862                 x86_push_reg (code, X86_EAX);
863                 if (enable_arguments) {
864                         x86_push_reg (code, X86_EDX);
865                         x86_push_reg (code, X86_EAX);
866                         arg_size = 8;
867                 }
868                 break;
869         case SAVE_EAX:
870                 x86_push_reg (code, X86_EAX);
871                 if (enable_arguments) {
872                         x86_push_reg (code, X86_EAX);
873                         arg_size = 4;
874                 }
875                 break;
876         case SAVE_FP:
877                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
878                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
879                 if (enable_arguments) {
880                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
881                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
882                         arg_size = 8;
883                 }
884                 break;
885         case SAVE_STRUCT:
886                 if (enable_arguments) {
887                         x86_push_membase (code, X86_EBP, 8);
888                         arg_size = 4;
889                 }
890                 break;
891         case SAVE_NONE:
892         default:
893                 break;
894         }
895
896
897         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
898         x86_push_imm (code, method);
899         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
900         x86_call_code (code, 0);
901         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
902
903         switch (save_mode) {
904         case SAVE_EAX_EDX:
905                 x86_pop_reg (code, X86_EAX);
906                 x86_pop_reg (code, X86_EDX);
907                 break;
908         case SAVE_EAX:
909                 x86_pop_reg (code, X86_EAX);
910                 break;
911         case SAVE_FP:
912                 x86_fld_membase (code, X86_ESP, 0, TRUE);
913                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
914                 break;
915         case SAVE_NONE:
916         default:
917                 break;
918         }
919
920         return code;
921 }
922
923 #define EMIT_COND_BRANCH(ins,cond,sign) \
924 if (ins->flags & MONO_INST_BRLABEL) { \
925         if (ins->inst_i0->inst_c0) { \
926                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
927         } else { \
928                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
929                 x86_branch32 (code, cond, 0, sign); \
930         } \
931 } else { \
932         if (ins->inst_true_bb->native_offset) { \
933                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
934         } else { \
935                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
936                 if ((cfg->opt & MONO_OPT_BRANCH) && \
937                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
938                         x86_branch8 (code, cond, 0, sign); \
939                 else \
940                         x86_branch32 (code, cond, 0, sign); \
941         } \
942 }
943
944 /* emit an exception if condition is fail */
945 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
946         do {                                                        \
947                 mono_add_patch_info (cfg, code - cfg->native_code,   \
948                                     MONO_PATCH_INFO_EXC, exc_name);  \
949                 x86_branch32 (code, cond, 0, signed);               \
950         } while (0); 
951
952 #define EMIT_FPCOMPARE(code) do { \
953         x86_fcompp (code); \
954         x86_fnstsw (code); \
955 } while (0); 
956
957 static void
958 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
959 {
960         MonoInst *ins, *last_ins = NULL;
961         ins = bb->code;
962
963         while (ins) {
964
965                 switch (ins->opcode) {
966                 case OP_ICONST:
967                         /* reg = 0 -> XOR (reg, reg) */
968                         /* XOR sets cflags on x86, so we cant do it always */
969                         if (ins->inst_c0 == 0 && ins->next &&
970                             (ins->next->opcode == CEE_BR)) { 
971                                 ins->opcode = CEE_XOR;
972                                 ins->sreg1 = ins->dreg;
973                                 ins->sreg2 = ins->dreg;
974                         }
975                         break;
976                 case OP_MUL_IMM: 
977                         /* remove unnecessary multiplication with 1 */
978                         if (ins->inst_imm == 1) {
979                                 if (ins->dreg != ins->sreg1) {
980                                         ins->opcode = OP_MOVE;
981                                 } else {
982                                         last_ins->next = ins->next;                             
983                                         ins = ins->next;                                
984                                         continue;
985                                 }
986                         }
987                         break;
988                 case OP_COMPARE_IMM:
989                         /* OP_COMPARE_IMM (reg, 0) --> OP_X86_TEST_NULL (reg) */
990                         if (ins->inst_imm == 0 && ins->next &&
991                             (ins->next->opcode == CEE_BEQ || ins->next->opcode == CEE_BNE_UN ||
992                              ins->next->opcode == OP_CEQ)) {
993                                 ins->opcode = OP_X86_TEST_NULL;
994                         }     
995                         break;
996                 case OP_LOAD_MEMBASE:
997                 case OP_LOADI4_MEMBASE:
998                         /* 
999                          * OP_STORE_MEMBASE_REG reg, offset(basereg) 
1000                          * OP_LOAD_MEMBASE offset(basereg), reg
1001                          */
1002                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1003                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1004                             ins->inst_basereg == last_ins->inst_destbasereg &&
1005                             ins->inst_offset == last_ins->inst_offset) {
1006                                 if (ins->dreg == last_ins->sreg1) {
1007                                         last_ins->next = ins->next;                             
1008                                         ins = ins->next;                                
1009                                         continue;
1010                                 } else {
1011                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1012                                         ins->opcode = OP_MOVE;
1013                                         ins->sreg1 = last_ins->sreg1;
1014                                 }
1015
1016                         /* 
1017                          * Note: reg1 must be different from the basereg in the second load
1018                          * OP_LOAD_MEMBASE offset(basereg), reg1
1019                          * OP_LOAD_MEMBASE offset(basereg), reg2
1020                          * -->
1021                          * OP_LOAD_MEMBASE offset(basereg), reg1
1022                          * OP_MOVE reg1, reg2
1023                          */
1024                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1025                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1026                               ins->inst_basereg != last_ins->dreg &&
1027                               ins->inst_basereg == last_ins->inst_basereg &&
1028                               ins->inst_offset == last_ins->inst_offset) {
1029
1030                                 if (ins->dreg == last_ins->dreg) {
1031                                         last_ins->next = ins->next;                             
1032                                         ins = ins->next;                                
1033                                         continue;
1034                                 } else {
1035                                         ins->opcode = OP_MOVE;
1036                                         ins->sreg1 = last_ins->dreg;
1037                                 }
1038
1039                                 //g_assert_not_reached ();
1040
1041 #if 0
1042                         /* 
1043                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1044                          * OP_LOAD_MEMBASE offset(basereg), reg
1045                          * -->
1046                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1047                          * OP_ICONST reg, imm
1048                          */
1049                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1050                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1051                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1052                                    ins->inst_offset == last_ins->inst_offset) {
1053                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1054                                 ins->opcode = OP_ICONST;
1055                                 ins->inst_c0 = last_ins->inst_imm;
1056                                 g_assert_not_reached (); // check this rule
1057 #endif
1058                         }
1059                         break;
1060                 case OP_LOADU1_MEMBASE:
1061                 case OP_LOADI1_MEMBASE:
1062                   /*
1063                    * FIXME: Missing explanation
1064                    */
1065                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1066                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1067                                         ins->inst_offset == last_ins->inst_offset) {
1068                                 if (ins->dreg == last_ins->sreg1) {
1069                                         last_ins->next = ins->next;                             
1070                                         ins = ins->next;                                
1071                                         continue;
1072                                 } else {
1073                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1074                                         ins->opcode = OP_MOVE;
1075                                         ins->sreg1 = last_ins->sreg1;
1076                                 }
1077                         }
1078                         break;
1079                 case OP_LOADU2_MEMBASE:
1080                 case OP_LOADI2_MEMBASE:
1081                   /*
1082                    * FIXME: Missing explanation
1083                    */
1084                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1085                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1086                                         ins->inst_offset == last_ins->inst_offset) {
1087                                 if (ins->dreg == last_ins->sreg1) {
1088                                         last_ins->next = ins->next;                             
1089                                         ins = ins->next;                                
1090                                         continue;
1091                                 } else {
1092                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1093                                         ins->opcode = OP_MOVE;
1094                                         ins->sreg1 = last_ins->sreg1;
1095                                 }
1096                         }
1097                         break;
1098                 case CEE_CONV_I4:
1099                 case CEE_CONV_U4:
1100                 case OP_MOVE:
1101                         /* 
1102                          * OP_MOVE reg, reg 
1103                          */
1104                         if (ins->dreg == ins->sreg1) {
1105                                 if (last_ins)
1106                                         last_ins->next = ins->next;                             
1107                                 ins = ins->next;
1108                                 continue;
1109                         }
1110                         /* 
1111                          * OP_MOVE sreg, dreg 
1112                          * OP_MOVE dreg, sreg
1113                          */
1114                         if (last_ins && last_ins->opcode == OP_MOVE &&
1115                             ins->sreg1 == last_ins->dreg &&
1116                             ins->dreg == last_ins->sreg1) {
1117                                 last_ins->next = ins->next;                             
1118                                 ins = ins->next;                                
1119                                 continue;
1120                         }
1121                         break;
1122                 }
1123                 last_ins = ins;
1124                 ins = ins->next;
1125         }
1126         bb->last_ins = last_ins;
1127 }
1128
1129 static const int 
1130 branch_cc_table [] = {
1131         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1132         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1133         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1134 };
1135
1136 #define DEBUG(a) if (cfg->verbose_level > 1) a
1137 //#define DEBUG(a)
1138 #define reg_is_freeable(r) ((r) >= 0 && (r) <= 7 && X86_IS_CALLEE ((r)))
1139
1140 typedef struct {
1141         int born_in;
1142         int killed_in;
1143         int last_use;
1144         int prev_use;
1145 } RegTrack;
1146
1147 static const char*const * ins_spec = pentium_desc;
1148
1149 static void
1150 print_ins (int i, MonoInst *ins)
1151 {
1152         const char *spec = ins_spec [ins->opcode];
1153         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1154         if (spec [MONO_INST_DEST]) {
1155                 if (ins->dreg >= MONO_MAX_IREGS)
1156                         g_print (" R%d <-", ins->dreg);
1157                 else
1158                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1159         }
1160         if (spec [MONO_INST_SRC1]) {
1161                 if (ins->sreg1 >= MONO_MAX_IREGS)
1162                         g_print (" R%d", ins->sreg1);
1163                 else
1164                         g_print (" %s", mono_arch_regname (ins->sreg1));
1165         }
1166         if (spec [MONO_INST_SRC2]) {
1167                 if (ins->sreg2 >= MONO_MAX_IREGS)
1168                         g_print (" R%d", ins->sreg2);
1169                 else
1170                         g_print (" %s", mono_arch_regname (ins->sreg2));
1171         }
1172         if (spec [MONO_INST_CLOB])
1173                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1174         g_print ("\n");
1175 }
1176
1177 static void
1178 print_regtrack (RegTrack *t, int num)
1179 {
1180         int i;
1181         char buf [32];
1182         const char *r;
1183         
1184         for (i = 0; i < num; ++i) {
1185                 if (!t [i].born_in)
1186                         continue;
1187                 if (i >= MONO_MAX_IREGS) {
1188                         g_snprintf (buf, sizeof(buf), "R%d", i);
1189                         r = buf;
1190                 } else
1191                         r = mono_arch_regname (i);
1192                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1193         }
1194 }
1195
1196 typedef struct InstList InstList;
1197
1198 struct InstList {
1199         InstList *prev;
1200         InstList *next;
1201         MonoInst *data;
1202 };
1203
1204 static inline InstList*
1205 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1206 {
1207         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1208         item->data = data;
1209         item->prev = NULL;
1210         item->next = list;
1211         if (list)
1212                 list->prev = item;
1213         return item;
1214 }
1215
1216 /*
1217  * Force the spilling of the variable in the symbolic register 'reg'.
1218  */
1219 static int
1220 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1221 {
1222         MonoInst *load;
1223         int i, sel, spill;
1224         
1225         sel = cfg->rs->iassign [reg];
1226         /*i = cfg->rs->isymbolic [sel];
1227         g_assert (i == reg);*/
1228         i = reg;
1229         spill = ++cfg->spill_count;
1230         cfg->rs->iassign [i] = -spill - 1;
1231         mono_regstate_free_int (cfg->rs, sel);
1232         /* we need to create a spill var and insert a load to sel after the current instruction */
1233         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1234         load->dreg = sel;
1235         load->inst_basereg = X86_EBP;
1236         load->inst_offset = mono_spillvar_offset (cfg, spill);
1237         if (item->prev) {
1238                 while (ins->next != item->prev->data)
1239                         ins = ins->next;
1240         }
1241         load->next = ins->next;
1242         ins->next = load;
1243         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1244         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1245         g_assert (i == sel);
1246
1247         return sel;
1248 }
1249
1250 static int
1251 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1252 {
1253         MonoInst *load;
1254         int i, sel, spill;
1255
1256         DEBUG (g_print ("start regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1257         /* exclude the registers in the current instruction */
1258         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1259                 if (ins->sreg1 >= MONO_MAX_IREGS)
1260                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1261                 else
1262                         regmask &= ~ (1 << ins->sreg1);
1263                 DEBUG (g_print ("excluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1264         }
1265         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1266                 if (ins->sreg2 >= MONO_MAX_IREGS)
1267                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1268                 else
1269                         regmask &= ~ (1 << ins->sreg2);
1270                 DEBUG (g_print ("excluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1271         }
1272         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1273                 regmask &= ~ (1 << ins->dreg);
1274                 DEBUG (g_print ("excluding dreg %s\n", mono_arch_regname (ins->dreg)));
1275         }
1276
1277         DEBUG (g_print ("available regmask: 0x%08x\n", regmask));
1278         g_assert (regmask); /* need at least a register we can free */
1279         sel = -1;
1280         /* we should track prev_use and spill the register that's farther */
1281         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1282                 if (regmask & (1 << i)) {
1283                         sel = i;
1284                         DEBUG (g_print ("selected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1285                         break;
1286                 }
1287         }
1288         i = cfg->rs->isymbolic [sel];
1289         spill = ++cfg->spill_count;
1290         cfg->rs->iassign [i] = -spill - 1;
1291         mono_regstate_free_int (cfg->rs, sel);
1292         /* we need to create a spill var and insert a load to sel after the current instruction */
1293         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1294         load->dreg = sel;
1295         load->inst_basereg = X86_EBP;
1296         load->inst_offset = mono_spillvar_offset (cfg, spill);
1297         if (item->prev) {
1298                 while (ins->next != item->prev->data)
1299                         ins = ins->next;
1300         }
1301         load->next = ins->next;
1302         ins->next = load;
1303         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1304         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1305         g_assert (i == sel);
1306         
1307         return sel;
1308 }
1309
1310 static MonoInst*
1311 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1312 {
1313         MonoInst *copy;
1314         MONO_INST_NEW (cfg, copy, OP_MOVE);
1315         copy->dreg = dest;
1316         copy->sreg1 = src;
1317         if (ins) {
1318                 copy->next = ins->next;
1319                 ins->next = copy;
1320         }
1321         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1322         return copy;
1323 }
1324
1325 static MonoInst*
1326 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1327 {
1328         MonoInst *store;
1329         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1330         store->sreg1 = reg;
1331         store->inst_destbasereg = X86_EBP;
1332         store->inst_offset = mono_spillvar_offset (cfg, spill);
1333         if (ins) {
1334                 store->next = ins->next;
1335                 ins->next = store;
1336         }
1337         DEBUG (g_print ("SPILLED STORE (%d at 0x%08x(%%ebp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
1338         return store;
1339 }
1340
1341 static void
1342 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1343 {
1344         MonoInst *prev;
1345         if (item->next) {
1346                 prev = item->next->data;
1347
1348                 while (prev->next != ins)
1349                         prev = prev->next;
1350                 to_insert->next = ins;
1351                 prev->next = to_insert;
1352         } else {
1353                 to_insert->next = ins;
1354         }
1355         /* 
1356          * needed otherwise in the next instruction we can add an ins to the 
1357          * end and that would get past this instruction.
1358          */
1359         item->data = to_insert; 
1360 }
1361
1362 #if  0
1363 static int
1364 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1365 {
1366         int val = cfg->rs->iassign [sym_reg];
1367         if (val < 0) {
1368                 int spill = 0;
1369                 if (val < -1) {
1370                         /* the register gets spilled after this inst */
1371                         spill = -val -1;
1372                 }
1373                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1374                 if (val < 0)
1375                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1376                 cfg->rs->iassign [sym_reg] = val;
1377                 /* add option to store before the instruction for src registers */
1378                 if (spill)
1379                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1380         }
1381         cfg->rs->isymbolic [val] = sym_reg;
1382         return val;
1383 }
1384 #endif
1385
1386 /*#include "cprop.c"*/
1387
1388 /*
1389  * Local register allocation.
1390  * We first scan the list of instructions and we save the liveness info of
1391  * each register (when the register is first used, when it's value is set etc.).
1392  * We also reverse the list of instructions (in the InstList list) because assigning
1393  * registers backwards allows for more tricks to be used.
1394  */
1395 void
1396 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1397 {
1398         MonoInst *ins;
1399         MonoRegState *rs = cfg->rs;
1400         int i, val, fpcount;
1401         RegTrack *reginfo, *reginfof;
1402         RegTrack *reginfo1, *reginfo2, *reginfod;
1403         InstList *tmp, *reversed = NULL;
1404         const char *spec;
1405         guint32 src1_mask, src2_mask, dest_mask;
1406
1407         if (!bb->code)
1408                 return;
1409         rs->next_vireg = bb->max_ireg;
1410         rs->next_vfreg = bb->max_freg;
1411         mono_regstate_assign (rs);
1412         reginfo = mono_mempool_alloc0 (cfg->mempool, sizeof (RegTrack) * rs->next_vireg);
1413         reginfof = mono_mempool_alloc0 (cfg->mempool, sizeof (RegTrack) * rs->next_vfreg);
1414         rs->ifree_mask = X86_CALLEE_REGS;
1415
1416         ins = bb->code;
1417
1418         /*if (cfg->opt & MONO_OPT_COPYPROP)
1419                 local_copy_prop (cfg, ins);*/
1420         
1421         i = 1;
1422         fpcount = 0; /* FIXME: track fp stack utilization */
1423         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1424         /* forward pass on the instructions to collect register liveness info */
1425         while (ins) {
1426                 spec = ins_spec [ins->opcode];
1427                 DEBUG (print_ins (i, ins));
1428                 if (spec [MONO_INST_SRC1]) {
1429                         if (spec [MONO_INST_SRC1] == 'f')
1430                                 reginfo1 = reginfof;
1431                         else
1432                                 reginfo1 = reginfo;
1433                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1434                         reginfo1 [ins->sreg1].last_use = i;
1435                 } else {
1436                         ins->sreg1 = -1;
1437                 }
1438                 if (spec [MONO_INST_SRC2]) {
1439                         if (spec [MONO_INST_SRC2] == 'f')
1440                                 reginfo2 = reginfof;
1441                         else
1442                                 reginfo2 = reginfo;
1443                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1444                         reginfo2 [ins->sreg2].last_use = i;
1445                 } else {
1446                         ins->sreg2 = -1;
1447                 }
1448                 if (spec [MONO_INST_DEST]) {
1449                         if (spec [MONO_INST_DEST] == 'f')
1450                                 reginfod = reginfof;
1451                         else
1452                                 reginfod = reginfo;
1453                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1454                                 reginfod [ins->dreg].killed_in = i;
1455                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1456                         reginfod [ins->dreg].last_use = i;
1457                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1458                                 reginfod [ins->dreg].born_in = i;
1459                         if (spec [MONO_INST_DEST] == 'l') {
1460                                 /* result in eax:edx, the virtual register is allocated sequentially */
1461                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1462                                 reginfod [ins->dreg + 1].last_use = i;
1463                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1464                                         reginfod [ins->dreg + 1].born_in = i;
1465                         }
1466                 } else {
1467                         ins->dreg = -1;
1468                 }
1469                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
1470                 ++i;
1471                 ins = ins->next;
1472         }
1473
1474         DEBUG (print_regtrack (reginfo, rs->next_vireg));
1475         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
1476         tmp = reversed;
1477         while (tmp) {
1478                 int prev_dreg, prev_sreg1, prev_sreg2;
1479                 dest_mask = src1_mask = src2_mask = X86_CALLEE_REGS;
1480                 --i;
1481                 ins = tmp->data;
1482                 spec = ins_spec [ins->opcode];
1483                 DEBUG (g_print ("processing:"));
1484                 DEBUG (print_ins (i, ins));
1485                 if (spec [MONO_INST_CLOB] == 's') {
1486                         if (rs->ifree_mask & (1 << X86_ECX)) {
1487                                 DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
1488                                 rs->iassign [ins->sreg2] = X86_ECX;
1489                                 rs->isymbolic [X86_ECX] = ins->sreg2;
1490                                 ins->sreg2 = X86_ECX;
1491                                 rs->ifree_mask &= ~ (1 << X86_ECX);
1492                         } else {
1493                                 int need_ecx_spill = TRUE;
1494                                 /* 
1495                                  * we first check if src1/dreg is already assigned a register
1496                                  * and then we force a spill of the var assigned to ECX.
1497                                  */
1498                                 /* the destination register can't be ECX */
1499                                 dest_mask &= ~ (1 << X86_ECX);
1500                                 src1_mask &= ~ (1 << X86_ECX);
1501                                 val = rs->iassign [ins->dreg];
1502                                 /* 
1503                                  * the destination register is already assigned to ECX:
1504                                  * we need to allocate another register for it and then
1505                                  * copy from this to ECX.
1506                                  */
1507                                 if (val == X86_ECX && ins->dreg != ins->sreg2) {
1508                                         int new_dest = mono_regstate_alloc_int (rs, dest_mask);
1509                                         if (new_dest < 0)
1510                                                 new_dest = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
1511                                         g_assert (new_dest >= 0);
1512                                         ins->dreg = new_dest;
1513                                         create_copy_ins (cfg, X86_ECX, new_dest, ins);
1514                                         need_ecx_spill = FALSE;
1515                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
1516                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
1517                                         rs->iassign [ins->dreg] = val;
1518                                         rs->isymbolic [val] = prev_dreg;
1519                                         ins->dreg = val;*/
1520                                 }
1521                                 val = rs->iassign [ins->sreg1];
1522                                 if (val == X86_ECX) {
1523                                         g_assert_not_reached ();
1524                                 } else if (val >= 0) {
1525                                         /* 
1526                                          * the first src reg was already assigned to a register,
1527                                          * we need to copy it to the dest register because the 
1528                                          * shift instruction clobbers the first operand.
1529                                          */
1530                                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, val, NULL);
1531                                         insert_before_ins (ins, tmp, copy);
1532                                 }
1533                                 val = rs->iassign [ins->sreg2];
1534                                 if (val >= 0 && val != X86_ECX) {
1535                                         MonoInst *move = create_copy_ins (cfg, X86_ECX, val, NULL);
1536                                         DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
1537                                         move->next = ins;
1538                                         g_assert_not_reached ();
1539                                         /* FIXME: where is move connected to the instruction list? */
1540                                         //tmp->prev->data->next = move;
1541                                 }
1542                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << X86_ECX))) {
1543                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_ECX]));
1544                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_ECX]);
1545                                         mono_regstate_free_int (rs, X86_ECX);
1546                                 }
1547                                 /* force-set sreg2 */
1548                                 rs->iassign [ins->sreg2] = X86_ECX;
1549                                 rs->isymbolic [X86_ECX] = ins->sreg2;
1550                                 ins->sreg2 = X86_ECX;
1551                                 rs->ifree_mask &= ~ (1 << X86_ECX);
1552                         }
1553                 } else if (spec [MONO_INST_CLOB] == 'd') { /* division */
1554                         int dest_reg = X86_EAX;
1555                         int clob_reg = X86_EDX;
1556                         if (spec [MONO_INST_DEST] == 'd') {
1557                                 dest_reg = X86_EDX; /* reminder */
1558                                 clob_reg = X86_EAX;
1559                         }
1560                         val = rs->iassign [ins->dreg];
1561                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
1562                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
1563                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
1564                                 mono_regstate_free_int (rs, dest_reg);
1565                         }
1566                         if (val < 0) {
1567                                 if (val < -1) {
1568                                         /* the register gets spilled after this inst */
1569                                         int spill = -val -1;
1570                                         dest_mask = 1 << clob_reg;
1571                                         prev_dreg = ins->dreg;
1572                                         val = mono_regstate_alloc_int (rs, dest_mask);
1573                                         if (val < 0)
1574                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
1575                                         rs->iassign [ins->dreg] = val;
1576                                         if (spill)
1577                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
1578                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
1579                                         rs->isymbolic [val] = prev_dreg;
1580                                         ins->dreg = val;
1581                                         if (val != dest_reg) { /* force a copy */
1582                                                 create_copy_ins (cfg, val, dest_reg, ins);
1583                                         }
1584                                 } else {
1585                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
1586                                         rs->iassign [ins->dreg] = dest_reg;
1587                                         rs->isymbolic [dest_reg] = ins->dreg;
1588                                         ins->dreg = dest_reg;
1589                                         rs->ifree_mask &= ~ (1 << dest_reg);
1590                                 }
1591                         } else {
1592                                 //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
1593                                 if (val != dest_reg) { /* force a copy */
1594                                         create_copy_ins (cfg, val, dest_reg, ins);
1595                                         if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
1596                                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
1597                                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
1598                                                 mono_regstate_free_int (rs, dest_reg);
1599                                         }
1600                                 }
1601                         }
1602                         src1_mask = 1 << X86_EAX;
1603                         src2_mask = 1 << X86_ECX;
1604                 }
1605                 if (spec [MONO_INST_DEST] == 'l') {
1606                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
1607                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
1608                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
1609                                 mono_regstate_free_int (rs, X86_EAX);
1610                         }
1611                         if (!(rs->ifree_mask & (1 << X86_EDX))) {
1612                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EDX]));
1613                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
1614                                 mono_regstate_free_int (rs, X86_EDX);
1615                         }
1616                 }
1617
1618                 /* update for use with FP regs... */
1619                 if (spec [MONO_INST_DEST] != 'f' && ins->dreg >= MONO_MAX_IREGS) {
1620                         val = rs->iassign [ins->dreg];
1621                         prev_dreg = ins->dreg;
1622                         if (val < 0) {
1623                                 int spill = 0;
1624                                 if (val < -1) {
1625                                         /* the register gets spilled after this inst */
1626                                         spill = -val -1;
1627                                 }
1628                                 val = mono_regstate_alloc_int (rs, dest_mask);
1629                                 if (val < 0)
1630                                         val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
1631                                 rs->iassign [ins->dreg] = val;
1632                                 if (spill)
1633                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
1634                         }
1635                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
1636                         rs->isymbolic [val] = prev_dreg;
1637                         ins->dreg = val;
1638                         if (spec [MONO_INST_DEST] == 'l') {
1639                                 int hreg = prev_dreg + 1;
1640                                 val = rs->iassign [hreg];
1641                                 if (val < 0) {
1642                                         int spill = 0;
1643                                         if (val < -1) {
1644                                                 /* the register gets spilled after this inst */
1645                                                 spill = -val -1;
1646                                         }
1647                                         val = mono_regstate_alloc_int (rs, dest_mask);
1648                                         if (val < 0)
1649                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, hreg);
1650                                         rs->iassign [hreg] = val;
1651                                         if (spill)
1652                                                 create_spilled_store (cfg, spill, val, hreg, ins);
1653                                 }
1654                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
1655                                 rs->isymbolic [val] = hreg;
1656                                 /* FIXME:? ins->dreg = val; */
1657                                 if (ins->dreg == X86_EAX) {
1658                                         if (val != X86_EDX)
1659                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1660                                 } else if (ins->dreg == X86_EDX) {
1661                                         if (val == X86_EAX) {
1662                                                 /* swap */
1663                                                 g_assert_not_reached ();
1664                                         } else {
1665                                                 /* two forced copies */
1666                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1667                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1668                                         }
1669                                 } else {
1670                                         if (val == X86_EDX) {
1671                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1672                                         } else {
1673                                                 /* two forced copies */
1674                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1675                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1676                                         }
1677                                 }
1678                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
1679                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
1680                                         mono_regstate_free_int (rs, val);
1681                                 }
1682                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != X86_EAX && spec [MONO_INST_CLOB] != 'd') {
1683                                 /* this instruction only outputs to EAX, need to copy */
1684                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1685                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != X86_EDX && spec [MONO_INST_CLOB] != 'd') {
1686                                 create_copy_ins (cfg, ins->dreg, X86_EDX, ins);
1687                         }
1688                 } else {
1689                         prev_dreg = -1;
1690                 }
1691                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
1692                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
1693                         mono_regstate_free_int (rs, ins->dreg);
1694                 }
1695                 /* put src1 in EAX if it needs to be */
1696                 if (spec [MONO_INST_SRC1] == 'a') {
1697                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
1698                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
1699                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
1700                                 mono_regstate_free_int (rs, X86_EAX);
1701                         }
1702                         /* force-set sreg1 */
1703                         rs->iassign [ins->sreg1] = X86_EAX;
1704                         rs->isymbolic [X86_EAX] = ins->sreg1;
1705                         ins->sreg1 = X86_EAX;
1706                         rs->ifree_mask &= ~ (1 << X86_EAX);
1707                 }
1708                 if (spec [MONO_INST_SRC1] != 'f' && ins->sreg1 >= MONO_MAX_IREGS) {
1709                         val = rs->iassign [ins->sreg1];
1710                         prev_sreg1 = ins->sreg1;
1711                         if (val < 0) {
1712                                 int spill = 0;
1713                                 if (val < -1) {
1714                                         /* the register gets spilled after this inst */
1715                                         spill = -val -1;
1716                                 }
1717                                 if (0 && ins->opcode == OP_MOVE) {
1718                                         /* 
1719                                          * small optimization: the dest register is already allocated
1720                                          * but the src one is not: we can simply assign the same register
1721                                          * here and peephole will get rid of the instruction later.
1722                                          * This optimization may interfere with the clobbering handling:
1723                                          * it removes a mov operation that will be added again to handle clobbering.
1724                                          * There are also some other issues that should with make testjit.
1725                                          */
1726                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
1727                                         val = rs->iassign [ins->sreg1] = ins->dreg;
1728                                         //g_assert (val >= 0);
1729                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
1730                                 } else {
1731                                         //g_assert (val == -1); /* source cannot be spilled */
1732                                         val = mono_regstate_alloc_int (rs, src1_mask);
1733                                         if (val < 0)
1734                                                 val = get_register_spilling (cfg, tmp, ins, src1_mask, ins->sreg1);
1735                                         rs->iassign [ins->sreg1] = val;
1736                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
1737                                 }
1738                                 if (spill) {
1739                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
1740                                         insert_before_ins (ins, tmp, store);
1741                                 }
1742                         }
1743                         rs->isymbolic [val] = prev_sreg1;
1744                         ins->sreg1 = val;
1745                 } else {
1746                         prev_sreg1 = -1;
1747                 }
1748                 /* handle clobbering of sreg1 */
1749                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
1750                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
1751                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
1752                         if (ins->sreg2 == -1 || spec [MONO_INST_CLOB] == 's') {
1753                                 /* note: the copy is inserted before the current instruction! */
1754                                 insert_before_ins (ins, tmp, copy);
1755                                 /* we set sreg1 to dest as well */
1756                                 prev_sreg1 = ins->sreg1 = ins->dreg;
1757                         } else {
1758                                 /* inserted after the operation */
1759                                 copy->next = ins->next;
1760                                 ins->next = copy;
1761                         }
1762                 }
1763                 if (spec [MONO_INST_SRC2] != 'f' && ins->sreg2 >= MONO_MAX_IREGS) {
1764                         val = rs->iassign [ins->sreg2];
1765                         prev_sreg2 = ins->sreg2;
1766                         if (val < 0) {
1767                                 int spill = 0;
1768                                 if (val < -1) {
1769                                         /* the register gets spilled after this inst */
1770                                         spill = -val -1;
1771                                 }
1772                                 val = mono_regstate_alloc_int (rs, src2_mask);
1773                                 if (val < 0)
1774                                         val = get_register_spilling (cfg, tmp, ins, src2_mask, ins->sreg2);
1775                                 rs->iassign [ins->sreg2] = val;
1776                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
1777                                 if (spill)
1778                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
1779                         }
1780                         rs->isymbolic [val] = prev_sreg2;
1781                         ins->sreg2 = val;
1782                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != X86_ECX) {
1783                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [X86_ECX]));
1784                         }
1785                 } else {
1786                         prev_sreg2 = -1;
1787                 }
1788
1789                 if (spec [MONO_INST_CLOB] == 'c') {
1790                         int j, s;
1791                         guint32 clob_mask = X86_CALLEE_REGS;
1792                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
1793                                 s = 1 << j;
1794                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
1795                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
1796                                 }
1797                         }
1798                 }
1799                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
1800                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
1801                         mono_regstate_free_int (rs, ins->sreg1);
1802                 }
1803                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
1804                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
1805                         mono_regstate_free_int (rs, ins->sreg2);
1806                 }*/
1807                 
1808                 //DEBUG (print_ins (i, ins));
1809                 /* this may result from a insert_before call */
1810                 if (!tmp->next)
1811                         bb->code = tmp->data;
1812                 tmp = tmp->next;
1813         }
1814 }
1815
1816 static unsigned char*
1817 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1818 {
1819         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1820         x86_fnstcw_membase(code, X86_ESP, 0);
1821         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1822         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1823         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1824         x86_fldcw_membase (code, X86_ESP, 2);
1825         if (size == 8) {
1826                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1827                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1828                 x86_pop_reg (code, dreg);
1829                 /* FIXME: need the high register 
1830                  * x86_pop_reg (code, dreg_high);
1831                  */
1832         } else {
1833                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1834                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1835                 x86_pop_reg (code, dreg);
1836         }
1837         x86_fldcw_membase (code, X86_ESP, 0);
1838         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1839
1840         if (size == 1)
1841                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1842         else if (size == 2)
1843                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1844         return code;
1845 }
1846
1847 static unsigned char*
1848 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1849 {
1850         int sreg = tree->sreg1;
1851 #ifdef PLATFORM_WIN32
1852         guint8* br[5];
1853
1854         /*
1855          * Under Windows:
1856          * If requested stack size is larger than one page,
1857          * perform stack-touch operation
1858          */
1859         /*
1860          * Generate stack probe code.
1861          * Under Windows, it is necessary to allocate one page at a time,
1862          * "touching" stack after each successful sub-allocation. This is
1863          * because of the way stack growth is implemented - there is a
1864          * guard page before the lowest stack page that is currently commited.
1865          * Stack normally grows sequentially so OS traps access to the
1866          * guard page and commits more pages when needed.
1867          */
1868         x86_test_reg_imm (code, sreg, ~0xFFF);
1869         br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1870
1871         br[2] = code; /* loop */
1872         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1873         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1874         x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1875         x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1876         br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1877         x86_patch (br[3], br[2]);
1878         x86_test_reg_reg (code, sreg, sreg);
1879         br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1880         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1881
1882         br[1] = code; x86_jump8 (code, 0);
1883
1884         x86_patch (br[0], code);
1885         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1886         x86_patch (br[1], code);
1887         x86_patch (br[4], code);
1888 #else /* PLATFORM_WIN32 */
1889         x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1890 #endif
1891         if (tree->flags & MONO_INST_INIT) {
1892                 int offset = 0;
1893                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1894                         x86_push_reg (code, X86_EAX);
1895                         offset += 4;
1896                 }
1897                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1898                         x86_push_reg (code, X86_ECX);
1899                         offset += 4;
1900                 }
1901                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1902                         x86_push_reg (code, X86_EDI);
1903                         offset += 4;
1904                 }
1905                 
1906                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1907                 if (sreg != X86_ECX)
1908                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1909                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1910                                 
1911                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1912                 x86_cld (code);
1913                 x86_prefix (code, X86_REP_PREFIX);
1914                 x86_stosl (code);
1915                 
1916                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1917                         x86_pop_reg (code, X86_EDI);
1918                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1919                         x86_pop_reg (code, X86_ECX);
1920                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1921                         x86_pop_reg (code, X86_EAX);
1922         }
1923         return code;
1924 }
1925
1926 #define REAL_PRINT_REG(text,reg) \
1927 mono_assert (reg >= 0); \
1928 x86_push_reg (code, X86_EAX); \
1929 x86_push_reg (code, X86_EDX); \
1930 x86_push_reg (code, X86_ECX); \
1931 x86_push_reg (code, reg); \
1932 x86_push_imm (code, reg); \
1933 x86_push_imm (code, text " %d %p\n"); \
1934 x86_mov_reg_imm (code, X86_EAX, printf); \
1935 x86_call_reg (code, X86_EAX); \
1936 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1937 x86_pop_reg (code, X86_ECX); \
1938 x86_pop_reg (code, X86_EDX); \
1939 x86_pop_reg (code, X86_EAX);
1940
1941 void
1942 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1943 {
1944         MonoInst *ins;
1945         MonoCallInst *call;
1946         guint offset;
1947         guint8 *code = cfg->native_code + cfg->code_len;
1948         MonoInst *last_ins = NULL;
1949         guint last_offset = 0;
1950         int max_len, cpos;
1951
1952         if (cfg->opt & MONO_OPT_PEEPHOLE)
1953                 peephole_pass (cfg, bb);
1954
1955 #if 0
1956         /* 
1957          * various stratgies to align BBs. Using real loop detection or simply
1958          * aligning every block leads to more consistent benchmark results,
1959          * but usually slows down the code
1960          * we should do the alignment outside this function or we should adjust
1961          * bb->native offset as well or the code is effectively slowed down!
1962          */
1963         /* align all blocks */
1964 //      if ((pad = (cfg->code_len & (align - 1)))) {
1965         /* poor man loop start detection */
1966 //      if (bb->code && bb->in_count && bb->in_bb [0]->cil_code > bb->cil_code && (pad = (cfg->code_len & (align - 1)))) {
1967         /* consider real loop detection and nesting level */
1968 //      if (bb->loop_blocks && bb->nesting < 3 && (pad = (cfg->code_len & (align - 1)))) {
1969         /* consider real loop detection */
1970         if (bb->loop_blocks && (pad = (cfg->code_len & (align - 1)))) {
1971                 pad = align - pad;
1972                 x86_padding (code, pad);
1973                 cfg->code_len += pad;
1974                 bb->native_offset = cfg->code_len;
1975         }
1976 #endif
1977
1978         if (cfg->verbose_level > 2)
1979                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1980
1981         cpos = bb->max_offset;
1982
1983         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
1984                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
1985                 g_assert (!mono_compile_aot);
1986                 cpos += 6;
1987
1988                 cov->data [bb->dfn].cil_code = bb->cil_code;
1989                 /* this is not thread save, but good enough */
1990                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
1991         }
1992
1993         offset = code - cfg->native_code;
1994
1995         ins = bb->code;
1996         while (ins) {
1997                 offset = code - cfg->native_code;
1998
1999                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2000
2001                 if (offset > (cfg->code_size - max_len - 16)) {
2002                         cfg->code_size *= 2;
2003                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2004                         code = cfg->native_code + offset;
2005                         mono_jit_stats.code_reallocs++;
2006                 }
2007
2008                 mono_debug_record_line_number (cfg, ins, offset);
2009
2010                 switch (ins->opcode) {
2011                 case OP_BIGMUL:
2012                         x86_mul_reg (code, ins->sreg2, TRUE);
2013                         break;
2014                 case OP_BIGMUL_UN:
2015                         x86_mul_reg (code, ins->sreg2, FALSE);
2016                         break;
2017                 case OP_X86_SETEQ_MEMBASE:
2018                         x86_set_membase (code, X86_CC_EQ, ins->inst_basereg, ins->inst_offset, TRUE);
2019                         break;
2020                 case OP_STOREI1_MEMBASE_IMM:
2021                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2022                         break;
2023                 case OP_STOREI2_MEMBASE_IMM:
2024                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2025                         break;
2026                 case OP_STORE_MEMBASE_IMM:
2027                 case OP_STOREI4_MEMBASE_IMM:
2028                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2029                         break;
2030                 case OP_STOREI1_MEMBASE_REG:
2031                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2032                         break;
2033                 case OP_STOREI2_MEMBASE_REG:
2034                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2035                         break;
2036                 case OP_STORE_MEMBASE_REG:
2037                 case OP_STOREI4_MEMBASE_REG:
2038                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2039                         break;
2040                 case CEE_LDIND_I:
2041                 case CEE_LDIND_I4:
2042                 case CEE_LDIND_U4:
2043                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2044                         break;
2045                 case OP_LOADU4_MEM:
2046                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2047                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2048                         break;
2049                 case OP_LOAD_MEMBASE:
2050                 case OP_LOADI4_MEMBASE:
2051                 case OP_LOADU4_MEMBASE:
2052                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2053                         break;
2054                 case OP_LOADU1_MEMBASE:
2055                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2056                         break;
2057                 case OP_LOADI1_MEMBASE:
2058                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2059                         break;
2060                 case OP_LOADU2_MEMBASE:
2061                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2062                         break;
2063                 case OP_LOADI2_MEMBASE:
2064                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2065                         break;
2066                 case CEE_CONV_I1:
2067                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2068                         break;
2069                 case CEE_CONV_I2:
2070                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2071                         break;
2072                 case CEE_CONV_U1:
2073                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2074                         break;
2075                 case CEE_CONV_U2:
2076                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2077                         break;
2078                 case OP_COMPARE:
2079                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2080                         break;
2081                 case OP_COMPARE_IMM:
2082                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2083                         break;
2084                 case OP_X86_COMPARE_MEMBASE_REG:
2085                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2086                         break;
2087                 case OP_X86_COMPARE_MEMBASE_IMM:
2088                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2089                         break;
2090                 case OP_X86_COMPARE_REG_MEMBASE:
2091                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2092                         break;
2093                 case OP_X86_TEST_NULL:
2094                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2095                         break;
2096                 case OP_X86_ADD_MEMBASE_IMM:
2097                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2098                         break;
2099                 case OP_X86_SUB_MEMBASE_IMM:
2100                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2101                         break;
2102                 case OP_X86_INC_MEMBASE:
2103                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2104                         break;
2105                 case OP_X86_INC_REG:
2106                         x86_inc_reg (code, ins->dreg);
2107                         break;
2108                 case OP_X86_DEC_MEMBASE:
2109                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2110                         break;
2111                 case OP_X86_DEC_REG:
2112                         x86_dec_reg (code, ins->dreg);
2113                         break;
2114                 case CEE_BREAK:
2115                         x86_breakpoint (code);
2116                         break;
2117                 case OP_ADDCC:
2118                 case CEE_ADD:
2119                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2120                         break;
2121                 case OP_ADC:
2122                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2123                         break;
2124                 case OP_ADD_IMM:
2125                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2126                         break;
2127                 case OP_ADC_IMM:
2128                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2129                         break;
2130                 case OP_SUBCC:
2131                 case CEE_SUB:
2132                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2133                         break;
2134                 case OP_SBB:
2135                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2136                         break;
2137                 case OP_SUB_IMM:
2138                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2139                         break;
2140                 case OP_SBB_IMM:
2141                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2142                         break;
2143                 case CEE_AND:
2144                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2145                         break;
2146                 case OP_AND_IMM:
2147                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2148                         break;
2149                 case CEE_DIV:
2150                         x86_cdq (code);
2151                         x86_div_reg (code, ins->sreg2, TRUE);
2152                         break;
2153                 case CEE_DIV_UN:
2154                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2155                         x86_div_reg (code, ins->sreg2, FALSE);
2156                         break;
2157                 case OP_DIV_IMM:
2158                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2159                         x86_cdq (code);
2160                         x86_div_reg (code, ins->sreg2, TRUE);
2161                         break;
2162                 case CEE_REM:
2163                         x86_cdq (code);
2164                         x86_div_reg (code, ins->sreg2, TRUE);
2165                         break;
2166                 case CEE_REM_UN:
2167                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2168                         x86_div_reg (code, ins->sreg2, FALSE);
2169                         break;
2170                 case OP_REM_IMM:
2171                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2172                         x86_cdq (code);
2173                         x86_div_reg (code, ins->sreg2, TRUE);
2174                         break;
2175                 case CEE_OR:
2176                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2177                         break;
2178                 case OP_OR_IMM:
2179                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2180                         break;
2181                 case CEE_XOR:
2182                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2183                         break;
2184                 case OP_XOR_IMM:
2185                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2186                         break;
2187                 case CEE_SHL:
2188                         g_assert (ins->sreg2 == X86_ECX);
2189                         x86_shift_reg (code, X86_SHL, ins->dreg);
2190                         break;
2191                 case CEE_SHR:
2192                         g_assert (ins->sreg2 == X86_ECX);
2193                         x86_shift_reg (code, X86_SAR, ins->dreg);
2194                         break;
2195                 case OP_SHR_IMM:
2196                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2197                         break;
2198                 case OP_SHR_UN_IMM:
2199                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2200                         break;
2201                 case CEE_SHR_UN:
2202                         g_assert (ins->sreg2 == X86_ECX);
2203                         x86_shift_reg (code, X86_SHR, ins->dreg);
2204                         break;
2205                 case OP_SHL_IMM:
2206                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2207                         break;
2208                 case CEE_NOT:
2209                         x86_not_reg (code, ins->sreg1);
2210                         break;
2211                 case CEE_NEG:
2212                         x86_neg_reg (code, ins->sreg1);
2213                         break;
2214                 case OP_SEXT_I1:
2215                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2216                         break;
2217                 case OP_SEXT_I2:
2218                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2219                         break;
2220                 case CEE_MUL:
2221                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2222                         break;
2223                 case OP_MUL_IMM:
2224                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2225                         break;
2226                 case CEE_MUL_OVF:
2227                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2228                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2229                         break;
2230                 case CEE_MUL_OVF_UN: {
2231                         /* the mul operation and the exception check should most likely be split */
2232                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2233                         /*g_assert (ins->sreg2 == X86_EAX);
2234                         g_assert (ins->dreg == X86_EAX);*/
2235                         if (ins->sreg2 == X86_EAX) {
2236                                 non_eax_reg = ins->sreg1;
2237                         } else if (ins->sreg1 == X86_EAX) {
2238                                 non_eax_reg = ins->sreg2;
2239                         } else {
2240                                 /* no need to save since we're going to store to it anyway */
2241                                 if (ins->dreg != X86_EAX) {
2242                                         saved_eax = TRUE;
2243                                         x86_push_reg (code, X86_EAX);
2244                                 }
2245                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2246                                 non_eax_reg = ins->sreg2;
2247                         }
2248                         if (ins->dreg == X86_EDX) {
2249                                 if (!saved_eax) {
2250                                         saved_eax = TRUE;
2251                                         x86_push_reg (code, X86_EAX);
2252                                 }
2253                         } else if (ins->dreg != X86_EAX) {
2254                                 saved_edx = TRUE;
2255                                 x86_push_reg (code, X86_EDX);
2256                         }
2257                         x86_mul_reg (code, non_eax_reg, FALSE);
2258                         /* save before the check since pop and mov don't change the flags */
2259                         if (saved_edx)
2260                                 x86_pop_reg (code, X86_EDX);
2261                         if (saved_eax)
2262                                 x86_pop_reg (code, X86_EAX);
2263                         if (ins->dreg != X86_EAX)
2264                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2265                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2266                         break;
2267                 }
2268                 case OP_ICONST:
2269                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2270                         break;
2271                 case OP_AOTCONST:
2272                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2273                         x86_mov_reg_imm (code, ins->dreg, 0);
2274                         break;
2275                 case CEE_CONV_I4:
2276                 case CEE_CONV_U4:
2277                 case OP_MOVE:
2278                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2279                         break;
2280                 case CEE_JMP: {
2281                         /*
2282                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2283                          * Keep in sync with the code in emit_epilog.
2284                          */
2285                         int pos = 0;
2286
2287                         /* FIXME: no tracing support... */
2288                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2289                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2290                         /* reset offset to make max_len work */
2291                         offset = code - cfg->native_code;
2292
2293                         g_assert (!cfg->method->save_lmf);
2294
2295                         if (cfg->used_int_regs & (1 << X86_EBX))
2296                                 pos -= 4;
2297                         if (cfg->used_int_regs & (1 << X86_EDI))
2298                                 pos -= 4;
2299                         if (cfg->used_int_regs & (1 << X86_ESI))
2300                                 pos -= 4;
2301                         if (pos)
2302                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2303         
2304                         if (cfg->used_int_regs & (1 << X86_ESI))
2305                                 x86_pop_reg (code, X86_ESI);
2306                         if (cfg->used_int_regs & (1 << X86_EDI))
2307                                 x86_pop_reg (code, X86_EDI);
2308                         if (cfg->used_int_regs & (1 << X86_EBX))
2309                                 x86_pop_reg (code, X86_EBX);
2310         
2311                         /* restore ESP/EBP */
2312                         x86_leave (code);
2313                         offset = code - cfg->native_code;
2314                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2315                         x86_jump32 (code, 0);
2316                         break;
2317                 }
2318                 case OP_CHECK_THIS:
2319                         /* ensure ins->sreg1 is not NULL */
2320                         x86_alu_membase_imm (code, X86_CMP, ins->sreg1, 0, 0);
2321                         break;
2322                 case OP_ARGLIST: {
2323                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2324                         x86_push_reg (code, hreg);
2325                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2326                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2327                         x86_pop_reg (code, hreg);
2328                         break;
2329                 }
2330                 case OP_FCALL:
2331                 case OP_LCALL:
2332                 case OP_VCALL:
2333                 case OP_VOIDCALL:
2334                 case CEE_CALL:
2335                         call = (MonoCallInst*)ins;
2336                         if (ins->flags & MONO_INST_HAS_METHOD)
2337                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD, call->method);
2338                         else {
2339                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, call->fptr);
2340                         }
2341                         x86_call_code (code, 0);
2342                         if (call->stack_usage && (call->signature->call_convention != MONO_CALL_STDCALL))
2343                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2344                         break;
2345                 case OP_FCALL_REG:
2346                 case OP_LCALL_REG:
2347                 case OP_VCALL_REG:
2348                 case OP_VOIDCALL_REG:
2349                 case OP_CALL_REG:
2350                         call = (MonoCallInst*)ins;
2351                         x86_call_reg (code, ins->sreg1);
2352                         if (call->stack_usage && (call->signature->call_convention != MONO_CALL_STDCALL))
2353                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2354                         break;
2355                 case OP_FCALL_MEMBASE:
2356                 case OP_LCALL_MEMBASE:
2357                 case OP_VCALL_MEMBASE:
2358                 case OP_VOIDCALL_MEMBASE:
2359                 case OP_CALL_MEMBASE:
2360                         call = (MonoCallInst*)ins;
2361                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2362                         if (call->stack_usage && (call->signature->call_convention != MONO_CALL_STDCALL))
2363                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2364                         break;
2365                 case OP_OUTARG:
2366                 case OP_X86_PUSH:
2367                         x86_push_reg (code, ins->sreg1);
2368                         break;
2369                 case OP_X86_PUSH_IMM:
2370                         x86_push_imm (code, ins->inst_imm);
2371                         break;
2372                 case OP_X86_PUSH_MEMBASE:
2373                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2374                         break;
2375                 case OP_X86_PUSH_OBJ: 
2376                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2377                         x86_push_reg (code, X86_EDI);
2378                         x86_push_reg (code, X86_ESI);
2379                         x86_push_reg (code, X86_ECX);
2380                         if (ins->inst_offset)
2381                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2382                         else
2383                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2384                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2385                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2386                         x86_cld (code);
2387                         x86_prefix (code, X86_REP_PREFIX);
2388                         x86_movsd (code);
2389                         x86_pop_reg (code, X86_ECX);
2390                         x86_pop_reg (code, X86_ESI);
2391                         x86_pop_reg (code, X86_EDI);
2392                         break;
2393                 case OP_X86_LEA:
2394                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2395                         break;
2396                 case OP_X86_LEA_MEMBASE:
2397                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2398                         break;
2399                 case OP_X86_XCHG:
2400                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2401                         break;
2402                 case OP_LOCALLOC:
2403                         /* keep alignment */
2404                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2405                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2406                         code = mono_emit_stack_alloc (code, ins);
2407                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2408                         break;
2409                 case CEE_RET:
2410                         x86_ret (code);
2411                         break;
2412                 case CEE_THROW: {
2413                         x86_push_reg (code, ins->sreg1);
2414                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2415                                              (gpointer)"mono_arch_throw_exception");
2416                         x86_call_code (code, 0);
2417                         break;
2418                 }
2419                 case OP_CALL_HANDLER: 
2420                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2421                         x86_call_imm (code, 0);
2422                         break;
2423                 case OP_LABEL:
2424                         ins->inst_c0 = code - cfg->native_code;
2425                         break;
2426                 case CEE_BR:
2427                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2428                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2429                         //break;
2430                         if (ins->flags & MONO_INST_BRLABEL) {
2431                                 if (ins->inst_i0->inst_c0) {
2432                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2433                                 } else {
2434                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2435                                         x86_jump32 (code, 0);
2436                                 }
2437                         } else {
2438                                 if (ins->inst_target_bb->native_offset) {
2439                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2440                                 } else {
2441                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2442                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2443                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2444                                                 x86_jump8 (code, 0);
2445                                         else 
2446                                                 x86_jump32 (code, 0);
2447                                 } 
2448                         }
2449                         break;
2450                 case OP_BR_REG:
2451                         x86_jump_reg (code, ins->sreg1);
2452                         break;
2453                 case OP_CEQ:
2454                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2455                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2456                         break;
2457                 case OP_CLT:
2458                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2459                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2460                         break;
2461                 case OP_CLT_UN:
2462                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2463                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2464                         break;
2465                 case OP_CGT:
2466                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2467                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2468                         break;
2469                 case OP_CGT_UN:
2470                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2471                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2472                         break;
2473                 case OP_COND_EXC_EQ:
2474                 case OP_COND_EXC_NE_UN:
2475                 case OP_COND_EXC_LT:
2476                 case OP_COND_EXC_LT_UN:
2477                 case OP_COND_EXC_GT:
2478                 case OP_COND_EXC_GT_UN:
2479                 case OP_COND_EXC_GE:
2480                 case OP_COND_EXC_GE_UN:
2481                 case OP_COND_EXC_LE:
2482                 case OP_COND_EXC_LE_UN:
2483                 case OP_COND_EXC_OV:
2484                 case OP_COND_EXC_NO:
2485                 case OP_COND_EXC_C:
2486                 case OP_COND_EXC_NC:
2487                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
2488                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2489                         break;
2490                 case CEE_BEQ:
2491                 case CEE_BNE_UN:
2492                 case CEE_BLT:
2493                 case CEE_BLT_UN:
2494                 case CEE_BGT:
2495                 case CEE_BGT_UN:
2496                 case CEE_BGE:
2497                 case CEE_BGE_UN:
2498                 case CEE_BLE:
2499                 case CEE_BLE_UN:
2500                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2501                         break;
2502
2503                 /* floating point opcodes */
2504                 case OP_R8CONST: {
2505                         double d = *(double *)ins->inst_p0;
2506
2507                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2508                                 x86_fldz (code);
2509                         } else if (d == 1.0) {
2510                                 x86_fld1 (code);
2511                         } else {
2512                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
2513                                 x86_fld (code, NULL, TRUE);
2514                         }
2515                         break;
2516                 }
2517                 case OP_R4CONST: {
2518                         float f = *(float *)ins->inst_p0;
2519
2520                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2521                                 x86_fldz (code);
2522                         } else if (f == 1.0) {
2523                                 x86_fld1 (code);
2524                         } else {
2525                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
2526                                 x86_fld (code, NULL, FALSE);
2527                         }
2528                         break;
2529                 }
2530                 case OP_STORER8_MEMBASE_REG:
2531                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2532                         break;
2533                 case OP_LOADR8_MEMBASE:
2534                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2535                         break;
2536                 case OP_STORER4_MEMBASE_REG:
2537                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2538                         break;
2539                 case OP_LOADR4_MEMBASE:
2540                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2541                         break;
2542                 case CEE_CONV_R4: /* FIXME: change precision */
2543                 case CEE_CONV_R8:
2544                         x86_push_reg (code, ins->sreg1);
2545                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2546                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2547                         break;
2548                 case OP_X86_FP_LOAD_I8:
2549                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2550                         break;
2551                 case OP_X86_FP_LOAD_I4:
2552                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2553                         break;
2554                 case OP_FCONV_TO_I1:
2555                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2556                         break;
2557                 case OP_FCONV_TO_U1:
2558                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2559                         break;
2560                 case OP_FCONV_TO_I2:
2561                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2562                         break;
2563                 case OP_FCONV_TO_U2:
2564                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2565                         break;
2566                 case OP_FCONV_TO_I4:
2567                 case OP_FCONV_TO_I:
2568                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2569                         break;
2570                 case OP_FCONV_TO_I8:
2571                         /* we defined this instruction to output only to eax:edx */
2572                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2573                         x86_fnstcw_membase(code, X86_ESP, 0);
2574                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, 0, 2);
2575                         x86_alu_reg_imm (code, X86_OR, X86_EAX, 0xc00);
2576                         x86_mov_membase_reg (code, X86_ESP, 2, X86_EAX, 2);
2577                         x86_fldcw_membase (code, X86_ESP, 2);
2578                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2579                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2580                         x86_pop_reg (code, X86_EAX);
2581                         x86_pop_reg (code, X86_EDX);
2582                         x86_fldcw_membase (code, X86_ESP, 0);
2583                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2584                         break;
2585                 case OP_LCONV_TO_R_UN: { 
2586                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2587                         guint8 *br;
2588
2589                         /* load 64bit integer to FP stack */
2590                         x86_push_imm (code, 0);
2591                         x86_push_reg (code, ins->sreg2);
2592                         x86_push_reg (code, ins->sreg1);
2593                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2594                         /* store as 80bit FP value */
2595                         x86_fst80_membase (code, X86_ESP, 0);
2596                         
2597                         /* test if lreg is negative */
2598                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2599                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2600         
2601                         /* add correction constant mn */
2602                         x86_fld80_mem (code, mn);
2603                         x86_fld80_membase (code, X86_ESP, 0);
2604                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2605                         x86_fst80_membase (code, X86_ESP, 0);
2606
2607                         x86_patch (br, code);
2608
2609                         x86_fld80_membase (code, X86_ESP, 0);
2610                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2611
2612                         break;
2613                 }
2614                 case OP_LCONV_TO_OVF_I: {
2615                         guint8 *br [3], *label [1];
2616
2617                         /* 
2618                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2619                          */
2620                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2621
2622                         /* If the low word top bit is set, see if we are negative */
2623                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2624                         /* We are not negative (no top bit set, check for our top word to be zero */
2625                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2626                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2627                         label [0] = code;
2628
2629                         /* throw exception */
2630                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2631                         x86_jump32 (code, 0);
2632         
2633                         x86_patch (br [0], code);
2634                         /* our top bit is set, check that top word is 0xfffffff */
2635                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2636                 
2637                         x86_patch (br [1], code);
2638                         /* nope, emit exception */
2639                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2640                         x86_patch (br [2], label [0]);
2641
2642                         if (ins->dreg != ins->sreg1)
2643                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2644                         break;
2645                 }
2646                 case OP_FADD:
2647                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2648                         break;
2649                 case OP_FSUB:
2650                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2651                         break;          
2652                 case OP_FMUL:
2653                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2654                         break;          
2655                 case OP_FDIV:
2656                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2657                         break;          
2658                 case OP_FNEG:
2659                         x86_fchs (code);
2660                         break;          
2661                 case OP_SIN:
2662                         x86_fsin (code);
2663                         break;          
2664                 case OP_COS:
2665                         x86_fcos (code);
2666                         break;          
2667                 case OP_ABS:
2668                         x86_fabs (code);
2669                         break;          
2670                 case OP_TAN: {
2671                         /* 
2672                          * it really doesn't make sense to inline all this code,
2673                          * it's here just to show that things may not be as simple 
2674                          * as they appear.
2675                          */
2676                         guchar *check_pos, *end_tan, *pop_jump;
2677                         x86_push_reg (code, X86_EAX);
2678                         x86_fptan (code);
2679                         x86_fnstsw (code);
2680                         x86_test_reg_imm (code, X86_EAX, 0x400);
2681                         check_pos = code;
2682                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2683                         x86_fstp (code, 0); /* pop the 1.0 */
2684                         end_tan = code;
2685                         x86_jump8 (code, 0);
2686                         x86_fldpi (code);
2687                         x86_fp_op (code, X86_FADD, 0);
2688                         x86_fxch (code, 1);
2689                         x86_fprem1 (code);
2690                         x86_fstsw (code);
2691                         x86_test_reg_imm (code, X86_EAX, 0x400);
2692                         pop_jump = code;
2693                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2694                         x86_fstp (code, 1);
2695                         x86_fptan (code);
2696                         x86_patch (pop_jump, code);
2697                         x86_fstp (code, 0); /* pop the 1.0 */
2698                         x86_patch (check_pos, code);
2699                         x86_patch (end_tan, code);
2700                         x86_pop_reg (code, X86_EAX);
2701                         break;
2702                 }
2703                 case OP_ATAN:
2704                         x86_fld1 (code);
2705                         x86_fpatan (code);
2706                         break;          
2707                 case OP_SQRT:
2708                         x86_fsqrt (code);
2709                         break;          
2710                 case OP_X86_FPOP:
2711                         x86_fstp (code, 0);
2712                         break;          
2713                 case OP_FREM: {
2714                         guint8 *l1, *l2;
2715
2716                         x86_push_reg (code, X86_EAX);
2717                         /* we need to exchange ST(0) with ST(1) */
2718                         x86_fxch (code, 1);
2719
2720                         /* this requires a loop, because fprem somtimes 
2721                          * returns a partial remainder */
2722                         l1 = code;
2723                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2724                         /* x86_fprem1 (code); */
2725                         x86_fprem (code);
2726                         x86_fnstsw (code);
2727                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x0400);
2728                         l2 = code + 2;
2729                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2730
2731                         /* pop result */
2732                         x86_fstp (code, 1);
2733
2734                         x86_pop_reg (code, X86_EAX);
2735                         break;
2736                 }
2737                 case OP_FCOMPARE:
2738                         if (cfg->opt & MONO_OPT_FCMOV) {
2739                                 x86_fcomip (code, 1);
2740                                 x86_fstp (code, 0);
2741                                 break;
2742                         }
2743                         /* this overwrites EAX */
2744                         EMIT_FPCOMPARE(code);
2745                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4500);
2746                         break;
2747                 case OP_FCEQ:
2748                         if (cfg->opt & MONO_OPT_FCMOV) {
2749                                 /* zeroing the register at the start results in 
2750                                  * shorter and faster code (we can also remove the widening op)
2751                                  */
2752                                 guchar *unordered_check;
2753                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2754                                 x86_fcomip (code, 1);
2755                                 x86_fstp (code, 0);
2756                                 unordered_check = code;
2757                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2758                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2759                                 x86_patch (unordered_check, code);
2760                                 break;
2761                         }
2762                         if (ins->dreg != X86_EAX) 
2763                                 x86_push_reg (code, X86_EAX);
2764
2765                         EMIT_FPCOMPARE(code);
2766                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4500);
2767                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2768                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2769                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2770
2771                         if (ins->dreg != X86_EAX) 
2772                                 x86_pop_reg (code, X86_EAX);
2773                         break;
2774                 case OP_FCLT:
2775                 case OP_FCLT_UN:
2776                         if (cfg->opt & MONO_OPT_FCMOV) {
2777                                 /* zeroing the register at the start results in 
2778                                  * shorter and faster code (we can also remove the widening op)
2779                                  */
2780                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2781                                 x86_fcomip (code, 1);
2782                                 x86_fstp (code, 0);
2783                                 if (ins->opcode == OP_FCLT_UN) {
2784                                         guchar *unordered_check = code;
2785                                         guchar *jump_to_end;
2786                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2787                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2788                                         jump_to_end = code;
2789                                         x86_jump8 (code, 0);
2790                                         x86_patch (unordered_check, code);
2791                                         x86_inc_reg (code, ins->dreg);
2792                                         x86_patch (jump_to_end, code);
2793                                 } else {
2794                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2795                                 }
2796                                 break;
2797                         }
2798                         if (ins->dreg != X86_EAX) 
2799                                 x86_push_reg (code, X86_EAX);
2800
2801                         EMIT_FPCOMPARE(code);
2802                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4500);
2803                         if (ins->opcode == OP_FCLT_UN) {
2804                                 guchar *is_not_zero_check, *end_jump;
2805                                 is_not_zero_check = code;
2806                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2807                                 end_jump = code;
2808                                 x86_jump8 (code, 0);
2809                                 x86_patch (is_not_zero_check, code);
2810                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4500);
2811
2812                                 x86_patch (end_jump, code);
2813                         }
2814                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2815                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2816
2817                         if (ins->dreg != X86_EAX) 
2818                                 x86_pop_reg (code, X86_EAX);
2819                         break;
2820                 case OP_FCGT:
2821                 case OP_FCGT_UN:
2822                         if (cfg->opt & MONO_OPT_FCMOV) {
2823                                 /* zeroing the register at the start results in 
2824                                  * shorter and faster code (we can also remove the widening op)
2825                                  */
2826                                 guchar *unordered_check;
2827                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2828                                 x86_fcomip (code, 1);
2829                                 x86_fstp (code, 0);
2830                                 if (ins->opcode == OP_FCGT) {
2831                                         unordered_check = code;
2832                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2833                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2834                                         x86_patch (unordered_check, code);
2835                                 } else {
2836                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2837                                 }
2838                                 break;
2839                         }
2840                         if (ins->dreg != X86_EAX) 
2841                                 x86_push_reg (code, X86_EAX);
2842
2843                         EMIT_FPCOMPARE(code);
2844                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4500);
2845                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x0100);
2846                         if (ins->opcode == OP_FCGT_UN) {
2847                                 guchar *is_not_zero_check, *end_jump;
2848                                 is_not_zero_check = code;
2849                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2850                                 end_jump = code;
2851                                 x86_jump8 (code, 0);
2852                                 x86_patch (is_not_zero_check, code);
2853                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4500);
2854
2855                                 x86_patch (end_jump, code);
2856                         }
2857                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2858                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2859
2860                         if (ins->dreg != X86_EAX) 
2861                                 x86_pop_reg (code, X86_EAX);
2862                         break;
2863                 case OP_FBEQ:
2864                         if (cfg->opt & MONO_OPT_FCMOV) {
2865                                 guchar *jump = code;
2866                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
2867                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2868                                 x86_patch (jump, code);
2869                                 break;
2870                         }
2871                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2872                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2873                         break;
2874                 case OP_FBNE_UN:
2875                         if (cfg->opt & MONO_OPT_FCMOV) {
2876                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2877                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2878                                 break;
2879                         }
2880                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2881                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2882                         break;
2883                 case OP_FBLT:
2884                         if (cfg->opt & MONO_OPT_FCMOV) {
2885                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2886                                 break;
2887                         }
2888                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2889                         break;
2890                 case OP_FBLT_UN:
2891                         if (cfg->opt & MONO_OPT_FCMOV) {
2892                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2893                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2894                                 break;
2895                         }
2896                         if (ins->opcode == OP_FBLT_UN) {
2897                                 guchar *is_not_zero_check, *end_jump;
2898                                 is_not_zero_check = code;
2899                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2900                                 end_jump = code;
2901                                 x86_jump8 (code, 0);
2902                                 x86_patch (is_not_zero_check, code);
2903                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4500);
2904
2905                                 x86_patch (end_jump, code);
2906                         }
2907                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2908                         break;
2909                 case OP_FBGT:
2910                 case OP_FBGT_UN:
2911                         if (cfg->opt & MONO_OPT_FCMOV) {
2912                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
2913                                 break;
2914                         }
2915                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x0100);
2916                         if (ins->opcode == OP_FBGT_UN) {
2917                                 guchar *is_not_zero_check, *end_jump;
2918                                 is_not_zero_check = code;
2919                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2920                                 end_jump = code;
2921                                 x86_jump8 (code, 0);
2922                                 x86_patch (is_not_zero_check, code);
2923                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4500);
2924
2925                                 x86_patch (end_jump, code);
2926                         }
2927                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2928                         break;
2929                 case OP_FBGE:
2930                 case OP_FBGE_UN:
2931                         if (cfg->opt & MONO_OPT_FCMOV) {
2932                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
2933                                 break;
2934                         }
2935                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2936                         break;
2937                 case OP_FBLE:
2938                 case OP_FBLE_UN:
2939                         if (cfg->opt & MONO_OPT_FCMOV) {
2940                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2941                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
2942                                 break;
2943                         }
2944                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x0100);
2945                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2946                         break;
2947                 case CEE_CKFINITE: {
2948                         x86_push_reg (code, X86_EAX);
2949                         x86_fxam (code);
2950                         x86_fnstsw (code);
2951                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
2952                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x0100);
2953                         x86_pop_reg (code, X86_EAX);
2954                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
2955                         break;
2956                 }
2957                 default:
2958                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
2959                         g_assert_not_reached ();
2960                 }
2961
2962                 if ((code - cfg->native_code - offset) > max_len) {
2963                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
2964                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
2965                         g_assert_not_reached ();
2966                 }
2967                
2968                 cpos += max_len;
2969
2970                 last_ins = ins;
2971                 last_offset = offset;
2972                 
2973                 ins = ins->next;
2974         }
2975
2976         cfg->code_len = code - cfg->native_code;
2977 }
2978
2979 void
2980 mono_arch_register_lowlevel_calls (void)
2981 {
2982         mono_register_jit_icall (enter_method, "mono_enter_method", NULL, TRUE);
2983         mono_register_jit_icall (leave_method, "mono_leave_method", NULL, TRUE);
2984 }
2985
2986 void
2987 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji)
2988 {
2989         MonoJumpInfo *patch_info;
2990
2991         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
2992                 unsigned char *ip = patch_info->ip.i + code;
2993                 const unsigned char *target = NULL;
2994
2995                 switch (patch_info->type) {
2996                 case MONO_PATCH_INFO_BB:
2997                         target = patch_info->data.bb->native_offset + code;
2998                         break;
2999                 case MONO_PATCH_INFO_ABS:
3000                         target = patch_info->data.target;
3001                         break;
3002                 case MONO_PATCH_INFO_LABEL:
3003                         target = patch_info->data.inst->inst_c0 + code;
3004                         break;
3005                 case MONO_PATCH_INFO_IP:
3006                         *((gpointer *)(ip)) = ip;
3007                         continue;
3008                 case MONO_PATCH_INFO_METHOD_REL:
3009                         *((gpointer *)(ip)) = code + patch_info->data.offset;
3010                         continue;
3011                 case MONO_PATCH_INFO_INTERNAL_METHOD: {
3012                         MonoJitICallInfo *mi = mono_find_jit_icall_by_name (patch_info->data.name);
3013                         if (!mi) {
3014                                 g_warning ("unknown MONO_PATCH_INFO_INTERNAL_METHOD %s", patch_info->data.name);
3015                                 g_assert_not_reached ();
3016                         }
3017                         target = mono_icall_get_wrapper (mi);
3018                         break;
3019                 }
3020                 case MONO_PATCH_INFO_METHOD_JUMP: {
3021                         GSList *list;
3022
3023                         /* get the trampoline to the method from the domain */
3024                         target = mono_arch_create_jump_trampoline (patch_info->data.method);
3025                         if (!domain->jump_target_hash)
3026                                 domain->jump_target_hash = g_hash_table_new (NULL, NULL);
3027                         list = g_hash_table_lookup (domain->jump_target_hash, patch_info->data.method);
3028                         list = g_slist_prepend (list, ip);
3029                         g_hash_table_insert (domain->jump_target_hash, patch_info->data.method, list);
3030                         break;
3031                 }
3032                 case MONO_PATCH_INFO_METHOD:
3033                         if (patch_info->data.method == method) {
3034                                 target = code;
3035                         } else
3036                                 /* get the trampoline to the method from the domain */
3037                                 target = mono_arch_create_jit_trampoline (patch_info->data.method);
3038                         break;
3039                 case MONO_PATCH_INFO_SWITCH: {
3040                         gpointer *jump_table = mono_mempool_alloc (domain->code_mp, sizeof (gpointer) * patch_info->table_size);
3041                         int i;
3042
3043                         *((gconstpointer *)(ip + 2)) = jump_table;
3044
3045                         for (i = 0; i < patch_info->table_size; i++) {
3046                                 jump_table [i] = code + (int)patch_info->data.table [i];
3047                         }
3048                         /* we put into the table the absolute address, no need for x86_patch in this case */
3049                         continue;
3050                 }
3051                 case MONO_PATCH_INFO_METHODCONST:
3052                 case MONO_PATCH_INFO_CLASS:
3053                 case MONO_PATCH_INFO_IMAGE:
3054                 case MONO_PATCH_INFO_FIELD:
3055                         *((gconstpointer *)(ip + 1)) = patch_info->data.target;
3056                         continue;
3057                 case MONO_PATCH_INFO_IID:
3058                         mono_class_init (patch_info->data.klass);
3059                         *((guint32 *)(ip + 1)) = patch_info->data.klass->interface_id;
3060                         continue;                       
3061                 case MONO_PATCH_INFO_VTABLE:
3062                         *((gconstpointer *)(ip + 1)) = mono_class_vtable (domain, patch_info->data.klass);
3063                         continue;
3064                 case MONO_PATCH_INFO_CLASS_INIT: {
3065                         guint8 *code = ip;
3066                         /* Might already been changed to a nop */
3067                         x86_call_imm (code, 0);
3068                         target = mono_create_class_init_trampoline (mono_class_vtable (domain, patch_info->data.klass));
3069                         break;
3070                 }
3071                 case MONO_PATCH_INFO_SFLDA: {
3072                         MonoVTable *vtable = mono_class_vtable (domain, patch_info->data.field->parent);
3073                         if (!vtable->initialized && !(vtable->klass->flags & TYPE_ATTRIBUTE_BEFORE_FIELD_INIT) && mono_class_needs_cctor_run (vtable->klass, method))
3074                                 /* Done by the generated code */
3075                                 ;
3076                         else {
3077                                 mono_runtime_class_init (vtable);
3078                         }
3079                         *((gconstpointer *)(ip + 1)) = 
3080                                 (char*)vtable->data + patch_info->data.field->offset;
3081                         continue;
3082                 }
3083                 case MONO_PATCH_INFO_R4:
3084                 case MONO_PATCH_INFO_R8:
3085                         *((gconstpointer *)(ip + 2)) = patch_info->data.target;
3086                         continue;
3087                 case MONO_PATCH_INFO_EXC_NAME:
3088                         *((gconstpointer *)(ip + 1)) = patch_info->data.name;
3089                         continue;
3090                 case MONO_PATCH_INFO_LDSTR:
3091                         *((gconstpointer *)(ip + 1)) = 
3092                                 mono_ldstr (domain, patch_info->data.token->image, 
3093                                                         mono_metadata_token_index (patch_info->data.token->token));
3094                         continue;
3095                 case MONO_PATCH_INFO_TYPE_FROM_HANDLE: {
3096                         gpointer handle;
3097                         MonoClass *handle_class;
3098
3099                         handle = mono_ldtoken (patch_info->data.token->image, 
3100                                                                    patch_info->data.token->token, &handle_class);
3101                         mono_class_init (handle_class);
3102                         mono_class_init (mono_class_from_mono_type (handle));
3103
3104                         *((gconstpointer *)(ip + 1)) = 
3105                                 mono_type_get_object (domain, handle);
3106                         continue;
3107                 }
3108                 case MONO_PATCH_INFO_LDTOKEN: {
3109                         gpointer handle;
3110                         MonoClass *handle_class;
3111
3112                         handle = mono_ldtoken (patch_info->data.token->image,
3113                                                                    patch_info->data.token->token, &handle_class);
3114                         mono_class_init (handle_class);
3115
3116                         *((gconstpointer *)(ip + 1)) = handle;
3117                         continue;
3118                 }
3119                 default:
3120                         g_assert_not_reached ();
3121                 }
3122                 x86_patch (ip, target);
3123         }
3124 }
3125
3126 int
3127 mono_arch_max_epilog_size (MonoCompile *cfg)
3128 {
3129         int exc_count = 0, max_epilog_size = 16;
3130         MonoJumpInfo *patch_info;
3131         
3132         if (cfg->method->save_lmf)
3133                 max_epilog_size += 128;
3134         
3135         if (mono_jit_trace_calls != NULL)
3136                 max_epilog_size += 50;
3137
3138         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3139                 max_epilog_size += 50;
3140
3141         /* count the number of exception infos */
3142      
3143         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3144                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3145                         exc_count++;
3146         }
3147
3148         /* 
3149          * make sure we have enough space for exceptions
3150          * 16 is the size of two push_imm instructions and a call
3151          */
3152         max_epilog_size += exc_count*16;
3153
3154         return max_epilog_size;
3155 }
3156
3157 guint8 *
3158 mono_arch_emit_prolog (MonoCompile *cfg)
3159 {
3160         MonoMethod *method = cfg->method;
3161         MonoBasicBlock *bb;
3162         MonoMethodSignature *sig;
3163         MonoInst *inst;
3164         int alloc_size, pos, max_offset, i;
3165         guint8 *code;
3166
3167         cfg->code_size =  MAX (((MonoMethodNormal *)method)->header->code_size * 4, 256);
3168         code = cfg->native_code = g_malloc (cfg->code_size);
3169
3170         x86_push_reg (code, X86_EBP);
3171         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3172
3173         alloc_size = - cfg->stack_offset;
3174         pos = 0;
3175
3176         if (method->save_lmf) {
3177                 pos += sizeof (MonoLMF);
3178
3179                 /* save the current IP */
3180                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3181                 x86_push_imm (code, 0);
3182
3183                 /* save all caller saved regs */
3184                 x86_push_reg (code, X86_EBX);
3185                 x86_push_reg (code, X86_EDI);
3186                 x86_push_reg (code, X86_ESI);
3187                 x86_push_reg (code, X86_EBP);
3188
3189                 /* save method info */
3190                 x86_push_imm (code, method);
3191
3192                 /* get the address of lmf for the current thread */
3193                 /* 
3194                  * This is performance critical so we try to use some tricks to make
3195                  * it fast.
3196                  */
3197                 if (lmf_tls_offset != -1) {
3198                         /* Load lmf quicky using the GS register */
3199                         x86_prefix (code, X86_GS_PREFIX);
3200                         x86_mov_reg_mem (code, X86_EAX, 0, 4);
3201                         x86_mov_reg_membase (code, X86_EAX, X86_EAX, lmf_tls_offset, 4);
3202                 }
3203                 else {
3204                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3205                                                                  (gpointer)"mono_get_lmf_addr");
3206                         x86_call_code (code, 0);
3207                 }
3208
3209                 /* push lmf */
3210                 x86_push_reg (code, X86_EAX); 
3211                 /* push *lfm (previous_lmf) */
3212                 x86_push_membase (code, X86_EAX, 0);
3213                 /* *(lmf) = ESP */
3214                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3215         } else {
3216
3217                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3218                         x86_push_reg (code, X86_EBX);
3219                         pos += 4;
3220                 }
3221
3222                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3223                         x86_push_reg (code, X86_EDI);
3224                         pos += 4;
3225                 }
3226
3227                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3228                         x86_push_reg (code, X86_ESI);
3229                         pos += 4;
3230                 }
3231         }
3232
3233         alloc_size -= pos;
3234
3235         if (alloc_size)
3236                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3237
3238         /* compute max_offset in order to use short forward jumps */
3239         max_offset = 0;
3240         if (cfg->opt & MONO_OPT_BRANCH) {
3241                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3242                         MonoInst *ins = bb->code;
3243                         bb->max_offset = max_offset;
3244
3245                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3246                                 max_offset += 6; 
3247
3248                         while (ins) {
3249                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3250                                 ins = ins->next;
3251                         }
3252                 }
3253         }
3254
3255         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3256                 code = mono_arch_instrument_prolog (cfg, enter_method, code, TRUE);
3257
3258         /* load arguments allocated to register from the stack */
3259         sig = method->signature;
3260         pos = 0;
3261
3262         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3263                 inst = cfg->varinfo [pos];
3264                 if (inst->opcode == OP_REGVAR) {
3265                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3266                         if (cfg->verbose_level > 2)
3267                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3268                 }
3269                 pos++;
3270         }
3271
3272         cfg->code_len = code - cfg->native_code;
3273
3274         return code;
3275 }
3276
3277 void
3278 mono_arch_emit_epilog (MonoCompile *cfg)
3279 {
3280         MonoJumpInfo *patch_info;
3281         MonoMethod *method = cfg->method;
3282         MonoMethodSignature *sig = method->signature;
3283         int pos;
3284         guint32 stack_to_pop;
3285         guint8 *code;
3286
3287         code = cfg->native_code + cfg->code_len;
3288
3289         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3290                 code = mono_arch_instrument_epilog (cfg, leave_method, code, TRUE);
3291
3292         /* the code restoring the registers must be kept in sync with CEE_JMP */
3293         pos = 0;
3294         
3295         if (method->save_lmf) {
3296                 pos = -sizeof (MonoLMF);
3297         } else {
3298                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3299                         pos -= 4;
3300                 }
3301                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3302                         pos -= 4;
3303                 }
3304                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3305                         pos -= 4;
3306                 }
3307         }
3308
3309         if (pos)
3310                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3311         
3312         if (method->save_lmf) {
3313                 /* ebx = previous_lmf */
3314                 x86_pop_reg (code, X86_EBX);
3315                 /* edi = lmf */
3316                 x86_pop_reg (code, X86_EDI);
3317                 /* *(lmf) = previous_lmf */
3318                 x86_mov_membase_reg (code, X86_EDI, 0, X86_EBX, 4);
3319
3320                 /* discard method info */
3321                 x86_pop_reg (code, X86_ESI);
3322
3323                 /* restore caller saved regs */
3324                 x86_pop_reg (code, X86_EBP);
3325                 x86_pop_reg (code, X86_ESI);
3326                 x86_pop_reg (code, X86_EDI);
3327                 x86_pop_reg (code, X86_EBX);
3328
3329         } else {
3330
3331                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3332                         x86_pop_reg (code, X86_ESI);
3333                 }
3334                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3335                         x86_pop_reg (code, X86_EDI);
3336                 }
3337                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3338                         x86_pop_reg (code, X86_EBX);
3339                 }
3340         }
3341
3342         x86_leave (code);
3343
3344         if (sig->call_convention == MONO_CALL_STDCALL) {
3345           MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3346
3347           stack_to_pop = arch_get_argument_info (sig, sig->param_count, arg_info);
3348         }
3349         else
3350         if (MONO_TYPE_ISSTRUCT (cfg->method->signature->ret))
3351           stack_to_pop = 4;
3352         else
3353           stack_to_pop = 0;
3354
3355         if (stack_to_pop)
3356                 x86_ret_imm (code, stack_to_pop);
3357         else
3358                 x86_ret (code);
3359
3360         /* add code to raise exceptions */
3361         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3362                 switch (patch_info->type) {
3363                 case MONO_PATCH_INFO_EXC:
3364                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3365                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);
3366                         x86_push_imm (code, patch_info->data.target);
3367                         mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_METHOD_REL, (gpointer)patch_info->ip.i);
3368                         x86_push_imm (code, patch_info->ip.i + cfg->native_code);
3369                         patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3370                         patch_info->data.name = "mono_arch_throw_exception_by_name";
3371                         patch_info->ip.i = code - cfg->native_code;
3372                         x86_jump_code (code, 0);
3373                         break;
3374                 default:
3375                         /* do nothing */
3376                         break;
3377                 }
3378         }
3379
3380         cfg->code_len = code - cfg->native_code;
3381
3382         g_assert (cfg->code_len < cfg->code_size);
3383
3384 }
3385
3386 void
3387 mono_arch_flush_icache (guint8 *code, gint size)
3388 {
3389         /* not needed */
3390 }
3391
3392 /*
3393  * Support for fast access to the thread-local lmf structure using the GS
3394  * segment register on NPTL + kernel 2.6.x.
3395  */
3396
3397 static gboolean tls_offset_inited = FALSE;
3398
3399 #ifdef HAVE_KW_THREAD
3400 static __thread gpointer mono_lmf_addr;
3401 #endif
3402
3403 static gpointer
3404 mono_arch_get_lmf_addr (void)
3405 {
3406 #ifdef HAVE_KW_THREAD
3407         return mono_lmf_addr;
3408 #else
3409         g_assert_not_reached ();
3410         return NULL;
3411 #endif
3412 }
3413
3414 void
3415 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3416 {
3417         if (!tls_offset_inited) {
3418                 guint8 *code;
3419
3420                 tls_offset_inited = TRUE;
3421
3422                 if (getenv ("MONO_NPTL")) {
3423                         /* 
3424                          * Determine the offset of mono_lfm_addr inside the TLS structures
3425                          * by disassembling the function above.
3426                          */
3427                         code = (guint8*)&mono_arch_get_lmf_addr;
3428
3429                         /* This is generated by gcc 3.3.2 */
3430                         if ((code [0] == 0x55) && (code [1] == 0x89) && (code [2] == 0xe5) &&
3431                                 (code [3] == 0x65) && (code [4] == 0xa1) && (code [5] == 0x00) &&
3432                                 (code [6] == 0x00) && (code [7] == 0x00) && (code [8] == 0x00) &&
3433                                 (code [9] == 0x8b) && (code [10] == 0x80)) {
3434                                 lmf_tls_offset = *(int*)&(code [11]);
3435                         }
3436                 }
3437         }               
3438
3439 #ifdef HAVE_KW_THREAD
3440         mono_lmf_addr = &tls->lmf;
3441 #endif
3442 }