34465f6e0824445e53e02b7a533ecc83124b017d
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *
8  * (C) 2003 Ximian, Inc.
9  */
10 #include "mini.h"
11 #include <string.h>
12
13 #include <mono/metadata/appdomain.h>
14 #include <mono/metadata/debug-helpers.h>
15
16 #include "mini-x86.h"
17 #include "inssel.h"
18 #include "regset.h"
19 #include "cpu-pentium.h"
20
21 int mono_exc_esp_offset = 0;
22
23 const char*
24 mono_arch_regname (int reg) {
25         switch (reg) {
26         case X86_EAX: return "%eax";
27         case X86_EBX: return "%ebx";
28         case X86_ECX: return "%ecx";
29         case X86_EDX: return "%edx";
30         case X86_ESP: return "%esp";
31         case X86_EBP: return "%ebp";
32         case X86_EDI: return "%edi";
33         case X86_ESI: return "%esi";
34         }
35         return "unknown";
36 }
37
38 typedef struct {
39         guint16 size;
40         guint16 offset;
41         guint8  pad;
42 } MonoJitArgumentInfo;
43
44 /*
45  * arch_get_argument_info:
46  * @csig:  a method signature
47  * @param_count: the number of parameters to consider
48  * @arg_info: an array to store the result infos
49  *
50  * Gathers information on parameters such as size, alignment and
51  * padding. arg_info should be large enought to hold param_count + 1 entries. 
52  *
53  * Returns the size of the activation frame.
54  */
55 static int
56 arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
57 {
58         int k, frame_size = 0;
59         int size, align, pad;
60         int offset = 8;
61
62         if (MONO_TYPE_ISSTRUCT (csig->ret)) { 
63                 frame_size += sizeof (gpointer);
64                 offset += 4;
65         }
66
67         arg_info [0].offset = offset;
68
69         if (csig->hasthis) {
70                 frame_size += sizeof (gpointer);
71                 offset += 4;
72         }
73
74         arg_info [0].size = frame_size;
75
76         for (k = 0; k < param_count; k++) {
77                 
78                 if (csig->pinvoke)
79                         size = mono_type_native_stack_size (csig->params [k], &align);
80                 else
81                         size = mono_type_stack_size (csig->params [k], &align);
82
83                 /* ignore alignment for now */
84                 align = 1;
85
86                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
87                 arg_info [k].pad = pad;
88                 frame_size += size;
89                 arg_info [k + 1].pad = 0;
90                 arg_info [k + 1].size = size;
91                 offset += pad;
92                 arg_info [k + 1].offset = offset;
93                 offset += size;
94         }
95
96         align = MONO_ARCH_FRAME_ALIGNMENT;
97         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
98         arg_info [k].pad = pad;
99
100         return frame_size;
101 }
102
103 static int indent_level = 0;
104
105 static void indent (int diff) {
106         int v = indent_level;
107         while (v-- > 0) {
108                 printf (". ");
109         }
110         indent_level += diff;
111 }
112
113 static void
114 enter_method (MonoMethod *method, char *ebp)
115 {
116         int i, j;
117         MonoClass *class;
118         MonoObject *o;
119         MonoJitArgumentInfo *arg_info;
120         MonoMethodSignature *sig;
121         char *fname;
122
123         fname = mono_method_full_name (method, TRUE);
124         indent (1);
125         printf ("ENTER: %s(", fname);
126         g_free (fname);
127         
128         if (((int)ebp & (MONO_ARCH_FRAME_ALIGNMENT - 1)) != 0) {
129                 g_error ("unaligned stack detected (%p)", ebp);
130         }
131
132         sig = method->signature;
133
134         arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
135
136         arch_get_argument_info (sig, sig->param_count, arg_info);
137
138         if (MONO_TYPE_ISSTRUCT (method->signature->ret)) {
139                 g_assert (!method->signature->ret->byref);
140
141                 printf ("VALUERET:%p, ", *((gpointer *)(ebp + 8)));
142         }
143
144         if (method->signature->hasthis) {
145                 gpointer *this = (gpointer *)(ebp + arg_info [0].offset);
146                 if (method->klass->valuetype) {
147                         printf ("value:%p, ", *this);
148                 } else {
149                         o = *((MonoObject **)this);
150
151                         if (o) {
152                                 class = o->vtable->klass;
153
154                                 if (class == mono_defaults.string_class) {
155                                         printf ("this:[STRING:%p:%s], ", o, mono_string_to_utf8 ((MonoString *)o));
156                                 } else {
157                                         printf ("this:%p[%s.%s], ", o, class->name_space, class->name);
158                                 }
159                         } else 
160                                 printf ("this:NULL, ");
161                 }
162         }
163
164         for (i = 0; i < method->signature->param_count; ++i) {
165                 gpointer *cpos = (gpointer *)(ebp + arg_info [i + 1].offset);
166                 int size = arg_info [i + 1].size;
167
168                 MonoType *type = method->signature->params [i];
169                 
170                 if (type->byref) {
171                         printf ("[BYREF:%p], ", *cpos); 
172                 } else switch (type->type) {
173                         
174                 case MONO_TYPE_I:
175                 case MONO_TYPE_U:
176                         printf ("%p, ", (gpointer)*((int *)(cpos)));
177                         break;
178                 case MONO_TYPE_BOOLEAN:
179                 case MONO_TYPE_CHAR:
180                 case MONO_TYPE_I1:
181                 case MONO_TYPE_U1:
182                 case MONO_TYPE_I2:
183                 case MONO_TYPE_U2:
184                 case MONO_TYPE_I4:
185                 case MONO_TYPE_U4:
186                         printf ("%d, ", *((int *)(cpos)));
187                         break;
188                 case MONO_TYPE_STRING: {
189                         MonoString *s = *((MonoString **)cpos);
190                         if (s) {
191                                 g_assert (((MonoObject *)s)->vtable->klass == mono_defaults.string_class);
192                                 printf ("[STRING:%p:%s], ", s, mono_string_to_utf8 (s));
193                         } else 
194                                 printf ("[STRING:null], ");
195                         break;
196                 }
197                 case MONO_TYPE_CLASS:
198                 case MONO_TYPE_OBJECT: {
199                         o = *((MonoObject **)cpos);
200                         if (o) {
201                                 class = o->vtable->klass;
202                     
203                                 if (class == mono_defaults.string_class) {
204                                         printf ("[STRING:%p:%s], ", o, mono_string_to_utf8 ((MonoString *)o));
205                                 } else if (class == mono_defaults.int32_class) {
206                                         printf ("[INT32:%p:%d], ", o, *(gint32 *)((char *)o + sizeof (MonoObject)));
207                                 } else
208                                         printf ("[%s.%s:%p], ", class->name_space, class->name, o);
209                         } else {
210                                 printf ("%p, ", *((gpointer *)(cpos)));                         
211                         }
212                         break;
213                 }
214                 case MONO_TYPE_PTR:
215                 case MONO_TYPE_FNPTR:
216                 case MONO_TYPE_ARRAY:
217                 case MONO_TYPE_SZARRAY:
218                         printf ("%p, ", *((gpointer *)(cpos)));
219                         break;
220                 case MONO_TYPE_I8:
221                 case MONO_TYPE_U8:
222                         printf ("0x%016llx, ", *((gint64 *)(cpos)));
223                         break;
224                 case MONO_TYPE_R4:
225                         printf ("%f, ", *((float *)(cpos)));
226                         break;
227                 case MONO_TYPE_R8:
228                         printf ("%f, ", *((double *)(cpos)));
229                         break;
230                 case MONO_TYPE_VALUETYPE: 
231                         printf ("[");
232                         for (j = 0; j < size; j++)
233                                 printf ("%02x,", *((guint8*)cpos +j));
234                         printf ("], ");
235                         break;
236                 default:
237                         printf ("XX, ");
238                 }
239         }
240
241         printf (")\n");
242 }
243
244 static void
245 leave_method (MonoMethod *method, ...)
246 {
247         MonoType *type;
248         char *fname;
249         va_list ap;
250
251         va_start(ap, method);
252
253         fname = mono_method_full_name (method, TRUE);
254         indent (-1);
255         printf ("LEAVE: %s", fname);
256         g_free (fname);
257
258         type = method->signature->ret;
259
260 handle_enum:
261         switch (type->type) {
262         case MONO_TYPE_VOID:
263                 break;
264         case MONO_TYPE_BOOLEAN: {
265                 int eax = va_arg (ap, int);
266                 if (eax)
267                         printf ("TRUE:%d", eax);
268                 else 
269                         printf ("FALSE");
270                         
271                 break;
272         }
273         case MONO_TYPE_CHAR:
274         case MONO_TYPE_I1:
275         case MONO_TYPE_U1:
276         case MONO_TYPE_I2:
277         case MONO_TYPE_U2:
278         case MONO_TYPE_I4:
279         case MONO_TYPE_U4:
280         case MONO_TYPE_I:
281         case MONO_TYPE_U: {
282                 int eax = va_arg (ap, int);
283                 printf ("EAX=%d", eax);
284                 break;
285         }
286         case MONO_TYPE_STRING: {
287                 MonoString *s = va_arg (ap, MonoString *);
288 ;
289                 if (s) {
290                         g_assert (((MonoObject *)s)->vtable->klass == mono_defaults.string_class);
291                         printf ("[STRING:%p:%s]", s, mono_string_to_utf8 (s));
292                 } else 
293                         printf ("[STRING:null], ");
294                 break;
295         }
296         case MONO_TYPE_CLASS: 
297         case MONO_TYPE_OBJECT: {
298                 MonoObject *o = va_arg (ap, MonoObject *);
299
300                 if (o) {
301                         if (o->vtable->klass == mono_defaults.boolean_class) {
302                                 printf ("[BOOLEAN:%p:%d]", o, *((guint8 *)o + sizeof (MonoObject)));            
303                         } else if  (o->vtable->klass == mono_defaults.int32_class) {
304                                 printf ("[INT32:%p:%d]", o, *((gint32 *)((char *)o + sizeof (MonoObject))));    
305                         } else if  (o->vtable->klass == mono_defaults.int64_class) {
306                                 printf ("[INT64:%p:%lld]", o, *((gint64 *)((char *)o + sizeof (MonoObject))));  
307                         } else
308                                 printf ("[%s.%s:%p]", o->vtable->klass->name_space, o->vtable->klass->name, o);
309                 } else
310                         printf ("[OBJECT:%p]", o);
311                
312                 break;
313         }
314         case MONO_TYPE_PTR:
315         case MONO_TYPE_FNPTR:
316         case MONO_TYPE_ARRAY:
317         case MONO_TYPE_SZARRAY: {
318                 gpointer p = va_arg (ap, gpointer);
319                 printf ("EAX=%p", p);
320                 break;
321         }
322         case MONO_TYPE_I8: {
323                 gint64 l =  va_arg (ap, gint64);
324                 printf ("EAX/EDX=0x%16llx", l);
325                 break;
326         }
327         case MONO_TYPE_U8: {
328                 gint64 l =  va_arg (ap, gint64);
329                 printf ("EAX/EDX=0x%16llx", l);
330                 break;
331         }
332         case MONO_TYPE_R8: {
333                 double f = va_arg (ap, double);
334                 printf ("FP=%f\n", f);
335                 break;
336         }
337         case MONO_TYPE_VALUETYPE: 
338                 if (type->data.klass->enumtype) {
339                         type = type->data.klass->enum_basetype;
340                         goto handle_enum;
341                 } else {
342                         guint8 *p = va_arg (ap, gpointer);
343                         int j, size, align;
344                         size = mono_type_size (type, &align);
345                         printf ("[");
346                         for (j = 0; p && j < size; j++)
347                                 printf ("%02x,", p [j]);
348                         printf ("]");
349                 }
350                 break;
351         default:
352                 printf ("(unknown return type %x)", method->signature->ret->type);
353         }
354
355         printf ("\n");
356 }
357
358 static const guchar cpuid_impl [] = {
359         0x55,                           /* push   %ebp */
360         0x89, 0xe5,                     /* mov    %esp,%ebp */
361         0x53,                           /* push   %ebx */
362         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
363         0x0f, 0xa2,                     /* cpuid   */
364         0x50,                           /* push   %eax */
365         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
366         0x89, 0x18,                     /* mov    %ebx,(%eax) */
367         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
368         0x89, 0x08,                     /* mov    %ecx,(%eax) */
369         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
370         0x89, 0x10,                     /* mov    %edx,(%eax) */
371         0x58,                           /* pop    %eax */
372         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
373         0x89, 0x02,                     /* mov    %eax,(%edx) */
374         0x5b,                           /* pop    %ebx */
375         0xc9,                           /* leave   */
376         0xc3,                           /* ret     */
377 };
378
379 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
380
381 static int 
382 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
383 {
384         int have_cpuid = 0;
385         __asm__  __volatile__ (
386                 "pushfl\n"
387                 "popl %%eax\n"
388                 "movl %%eax, %%edx\n"
389                 "xorl $0x200000, %%eax\n"
390                 "pushl %%eax\n"
391                 "popfl\n"
392                 "pushfl\n"
393                 "popl %%eax\n"
394                 "xorl %%edx, %%eax\n"
395                 "andl $0x200000, %%eax\n"
396                 "movl %%eax, %0"
397                 : "=r" (have_cpuid)
398                 :
399                 : "%eax", "%edx"
400         );
401
402         if (have_cpuid) {
403                 CpuidFunc func = (CpuidFunc)cpuid_impl;
404                 func (id, p_eax, p_ebx, p_ecx, p_edx);
405                 /*
406                  * We use this approach because of issues with gcc and pic code, see:
407                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
408                 __asm__ __volatile__ ("cpuid"
409                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
410                         : "a" (id));
411                 */
412                 return 1;
413         }
414         return 0;
415 }
416
417 /*
418  * This function returns the optimizations supported on this cpu.
419  */
420 guint32
421 mono_arch_cpu_optimizazions (void)
422 {
423         int eax, ebx, ecx, edx;
424         guint32 opts = 0;
425
426         /* Feature Flags function, flags returned in EDX. */
427         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
428                 if (edx & (1 << 15)) {
429                         opts |= MONO_OPT_CMOV;
430                         if (edx & 1)
431                                 opts |= MONO_OPT_FCMOV;
432                 }
433         }
434         return opts;
435 }
436
437 static gboolean
438 is_regsize_var (MonoType *t) {
439         if (t->byref)
440                 return TRUE;
441         switch (t->type) {
442         case MONO_TYPE_I4:
443         case MONO_TYPE_U4:
444         case MONO_TYPE_I:
445         case MONO_TYPE_U:
446                 return TRUE;
447         case MONO_TYPE_OBJECT:
448         case MONO_TYPE_STRING:
449         case MONO_TYPE_CLASS:
450         case MONO_TYPE_SZARRAY:
451         case MONO_TYPE_ARRAY:
452                 return FALSE;
453         case MONO_TYPE_VALUETYPE:
454                 if (t->data.klass->enumtype)
455                         return is_regsize_var (t->data.klass->enum_basetype);
456                 return FALSE;
457         }
458         return FALSE;
459 }
460
461 GList *
462 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
463 {
464         GList *vars = NULL;
465         int i;
466
467         for (i = 0; i < cfg->num_varinfo; i++) {
468                 MonoInst *ins = cfg->varinfo [i];
469                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
470
471                 /* unused vars */
472                 if (vmv->range.first_use.abs_pos > vmv->range.last_use.abs_pos)
473                         continue;
474
475                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
476                         continue;
477
478                 /* we can only allocate 32 bit values */
479                 if (is_regsize_var (ins->inst_vtype)) {
480                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
481                         g_assert (i == vmv->idx);
482                         vars = mono_varlist_insert_sorted (cfg, vars, vmv, FALSE);
483                 }
484         }
485
486         return vars;
487 }
488
489 GList *
490 mono_arch_get_global_int_regs (MonoCompile *cfg)
491 {
492         GList *regs = NULL;
493
494         /* we can use 3 registers for global allocation */
495         regs = g_list_prepend (regs, (gpointer)X86_EBX);
496         regs = g_list_prepend (regs, (gpointer)X86_ESI);
497         regs = g_list_prepend (regs, (gpointer)X86_EDI);
498
499         return regs;
500 }
501  
502 /*
503  * Set var information according to the calling convention. X86 version.
504  * The locals var stuff should most likely be split in another method.
505  */
506 void
507 mono_arch_allocate_vars (MonoCompile *m)
508 {
509         MonoMethodSignature *sig;
510         MonoMethodHeader *header;
511         MonoInst *inst;
512         int i, offset, size, align, curinst;
513
514         header = ((MonoMethodNormal *)m->method)->header;
515
516         sig = m->method->signature;
517         
518         offset = 8;
519         curinst = 0;
520         if (MONO_TYPE_ISSTRUCT (sig->ret)) {
521                 m->ret->opcode = OP_REGOFFSET;
522                 m->ret->inst_basereg = X86_EBP;
523                 m->ret->inst_offset = offset;
524                 offset += sizeof (gpointer);
525         } else {
526                 /* FIXME: handle long and FP values */
527                 switch (sig->ret->type) {
528                 case MONO_TYPE_VOID:
529                         break;
530                 default:
531                         m->ret->opcode = OP_REGVAR;
532                         m->ret->inst_c0 = X86_EAX;
533                         break;
534                 }
535         }
536         if (sig->hasthis) {
537                 inst = m->varinfo [curinst];
538                 if (inst->opcode != OP_REGVAR) {
539                         inst->opcode = OP_REGOFFSET;
540                         inst->inst_basereg = X86_EBP;
541                 }
542                 inst->inst_offset = offset;
543                 offset += sizeof (gpointer);
544                 curinst++;
545         }
546
547         for (i = 0; i < sig->param_count; ++i) {
548                 inst = m->varinfo [curinst];
549                 if (inst->opcode != OP_REGVAR) {
550                         inst->opcode = OP_REGOFFSET;
551                         inst->inst_basereg = X86_EBP;
552                 }
553                 inst->inst_offset = offset;
554                 size = mono_type_size (sig->params [i], &align);
555                 size += 4 - 1;
556                 size &= ~(4 - 1);
557                 offset += size;
558                 curinst++;
559         }
560
561         offset = 0;
562         /* reserve space to save LMF and caller saved registers */
563         offset += sizeof (MonoLMF);
564
565         /* reserve space to store the esp */
566         offset += sizeof (gpointer);
567
568         /* this is a global constant */
569         mono_exc_esp_offset = -offset;
570
571         for (i = curinst; i < m->num_varinfo; ++i) {
572                 inst = m->varinfo [i];
573
574                 if ((inst->flags & MONO_INST_IS_DEAD) || inst->opcode == OP_REGVAR)
575                         continue;
576
577                 /* inst->unused indicates native sized value types, this is used by the
578                 * pinvoke wrappers when they call functions returning structure */
579                 if (inst->unused && MONO_TYPE_ISSTRUCT (inst->inst_vtype))
580                         size = mono_class_native_size (inst->inst_vtype->data.klass, &align);
581                 else
582                         size = mono_type_size (inst->inst_vtype, &align);
583
584                 offset += size;
585                 offset += align - 1;
586                 offset &= ~(align - 1);
587                 inst->opcode = OP_REGOFFSET;
588                 inst->inst_basereg = X86_EBP;
589                 inst->inst_offset = -offset;
590                 //g_print ("allocating local %d to %d\n", i, -offset);
591         }
592         offset += 3;
593         offset &= ~3;
594
595
596         /* change sign? */
597         m->stack_offset = -offset;
598 }
599
600 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
601  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
602  */
603
604 /* 
605  * take the arguments and generate the arch-specific
606  * instructions to properly call the function in call.
607  * This includes pushing, moving arguments to the right register
608  * etc.
609  * Issue: who does the spilling if needed, and when?
610  */
611 MonoCallInst*
612 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
613         MonoInst *arg, *in, **rev_args;
614         MonoMethodSignature *sig;
615         int i, n, stack_size, type;
616         MonoType *ptype;
617
618         sig = call->signature;
619         n = sig->param_count + sig->hasthis;
620         rev_args = mono_mempool_alloc (cfg->mempool, sizeof (MonoInst*) * n);
621         
622         if (sig->ret && (sig->ret->type == MONO_TYPE_I8 || sig->ret->type == MONO_TYPE_U8)) {
623                 //g_warning ("long value returned");
624         }
625         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
626                 stack_size = 4;
627         else
628                 stack_size = 0;
629         for (i = 0; i < n; ++i) {
630                 if (is_virtual && i == 0) {
631                         /* the argument will be attached to the call instrucion */
632                         rev_args [n - 1] = arg = NULL;
633                         in = call->args [i];
634                         stack_size += 4;
635                 } else {
636                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
637                         in = call->args [i];
638                         arg->cil_code = in->cil_code;
639                         arg->inst_left = in;
640                         arg->type = in->type;
641                         rev_args [n - i - 1] = arg;
642                         if (i >= sig->hasthis) {
643                                 ptype = sig->params [i - sig->hasthis];
644                                 if (ptype->byref)
645                                         type = MONO_TYPE_U;
646                                 else
647                                         type = ptype->type;
648 handle_enum:
649                                 /* FIXME: validate arguments... */
650                                 switch (type) {
651                                 case MONO_TYPE_I:
652                                 case MONO_TYPE_U:
653                                 case MONO_TYPE_BOOLEAN:
654                                 case MONO_TYPE_CHAR:
655                                 case MONO_TYPE_I1:
656                                 case MONO_TYPE_U1:
657                                 case MONO_TYPE_I2:
658                                 case MONO_TYPE_U2:
659                                 case MONO_TYPE_I4:
660                                 case MONO_TYPE_U4:
661                                 case MONO_TYPE_STRING:
662                                 case MONO_TYPE_CLASS:
663                                 case MONO_TYPE_OBJECT:
664                                 case MONO_TYPE_PTR:
665                                 case MONO_TYPE_FNPTR:
666                                 case MONO_TYPE_ARRAY:
667                                 case MONO_TYPE_SZARRAY:
668                                         stack_size += 4;
669                                         break;
670                                 case MONO_TYPE_I8:
671                                 case MONO_TYPE_U8:
672                                         stack_size += 8;
673                                         break;
674                                 case MONO_TYPE_R4:
675                                         stack_size += 4;
676                                         arg->opcode = OP_OUTARG_R4;
677                                         break;
678                                 case MONO_TYPE_R8:
679                                         stack_size += 8;
680                                         arg->opcode = OP_OUTARG_R8;
681                                         break;
682                                 case MONO_TYPE_VALUETYPE:
683                                         if (MONO_TYPE_ISSTRUCT (ptype)) {
684                                                 int size;
685                                                 if (sig->pinvoke) 
686                                                         size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
687                                                 else 
688                                                         size = mono_type_stack_size (&in->klass->byval_arg, NULL);
689                                                 stack_size += size;
690                                                 arg->opcode = OP_OUTARG_VT;
691                                                 arg->klass = in->klass;
692                                                 arg->unused = sig->pinvoke;
693                                                 arg->inst_imm = size; 
694                                         } else {
695                                                 type = ptype->data.klass->enum_basetype->type;
696                                                 goto handle_enum;
697                                         }
698                                         break;
699                                 default:
700                                         g_warning ("unknown type 0x%02x\n", type);
701                                         g_assert_not_reached ();
702                                 }
703                         } else {
704                                 /* the this argument */
705                                 stack_size += 4;
706                         }
707                 }
708         }
709         /* they need to be pushed in reverse order */
710         call->args = rev_args;
711         call->stack_usage = stack_size;
712         /* 
713          * should set more info in call, such as the stack space
714          * used by the args that needs to be added back to esp
715          */
716
717         return call;
718 }
719
720 /*
721  * Allow tracing to work with this interface (with an optional argument)
722  */
723
724 /*
725  * This may be needed on some archs or for debugging support.
726  */
727 void
728 mono_arch_instrument_mem_needs (MonoMethod *method, int *stack, int *code)
729 {
730         /* no stack room needed now (may be needed for FASTCALL-trace support) */
731         *stack = 0;
732         /* split prolog-epilog requirements? */
733         *code = 50; /* max bytes needed: check this number */
734 }
735
736 void*
737 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
738 {
739         guchar *code = p;
740
741         /* if some args are passed in registers, we need to save them here */
742         x86_push_reg (code, X86_EBP);
743         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
744         x86_push_imm (code, cfg->method);
745         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
746         x86_call_code (code, 0);
747         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
748
749         return code;
750 }
751
752 enum {
753         SAVE_NONE,
754         SAVE_STRUCT,
755         SAVE_EAX,
756         SAVE_EAX_EDX,
757         SAVE_FP
758 };
759
760 void*
761 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
762 {
763         guchar *code = p;
764         int arg_size = 0, save_mode = SAVE_NONE;
765         MonoMethod *method = cfg->method;
766         int rtype = method->signature->ret->type;
767         
768 handle_enum:
769         switch (rtype) {
770         case MONO_TYPE_VOID:
771                 /* special case string .ctor icall */
772                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
773                         save_mode = SAVE_EAX;
774                 else
775                         save_mode = SAVE_NONE;
776                 break;
777         case MONO_TYPE_I8:
778         case MONO_TYPE_U8:
779                 save_mode = SAVE_EAX_EDX;
780                 break;
781         case MONO_TYPE_R4:
782         case MONO_TYPE_R8:
783                 save_mode = SAVE_FP;
784                 break;
785         case MONO_TYPE_VALUETYPE:
786                 if (method->signature->ret->data.klass->enumtype) {
787                         rtype = method->signature->ret->data.klass->enum_basetype->type;
788                         goto handle_enum;
789                 }
790                 save_mode = SAVE_STRUCT;
791                 break;
792         default:
793                 save_mode = SAVE_EAX;
794                 break;
795         }
796
797         switch (save_mode) {
798         case SAVE_EAX_EDX:
799                 x86_push_reg (code, X86_EDX);
800                 x86_push_reg (code, X86_EAX);
801                 if (enable_arguments) {
802                         x86_push_reg (code, X86_EDX);
803                         x86_push_reg (code, X86_EAX);
804                         arg_size = 8;
805                 }
806                 break;
807         case SAVE_EAX:
808                 x86_push_reg (code, X86_EAX);
809                 if (enable_arguments) {
810                         x86_push_reg (code, X86_EAX);
811                         arg_size = 4;
812                 }
813                 break;
814         case SAVE_FP:
815                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
816                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
817                 if (enable_arguments) {
818                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
819                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
820                         arg_size = 8;
821                 }
822                 break;
823         case SAVE_STRUCT:
824                 if (enable_arguments) {
825                         x86_push_membase (code, X86_EBP, 8);
826                         arg_size = 4;
827                 }
828                 break;
829         case SAVE_NONE:
830         default:
831                 break;
832         }
833
834
835         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
836         x86_push_imm (code, method);
837         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
838         x86_call_code (code, 0);
839         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
840
841         switch (save_mode) {
842         case SAVE_EAX_EDX:
843                 x86_pop_reg (code, X86_EAX);
844                 x86_pop_reg (code, X86_EDX);
845                 break;
846         case SAVE_EAX:
847                 x86_pop_reg (code, X86_EAX);
848                 break;
849         case SAVE_FP:
850                 x86_fld_membase (code, X86_ESP, 0, TRUE);
851                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
852                 break;
853         case SAVE_NONE:
854         default:
855                 break;
856         }
857
858         return code;
859 }
860
861 #define EMIT_COND_BRANCH(ins,cond,sign) \
862 if (ins->flags & MONO_INST_BRLABEL) { \
863         if (ins->inst_i0->inst_c0) { \
864                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
865         } else { \
866                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
867                 x86_branch32 (code, cond, 0, sign); \
868         } \
869 } else { \
870         if (ins->inst_true_bb->native_offset) { \
871                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
872         } else { \
873                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
874                 if ((cfg->opt & MONO_OPT_BRANCH) && \
875                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
876                         x86_branch8 (code, cond, 0, sign); \
877                 else \
878                         x86_branch32 (code, cond, 0, sign); \
879         } \
880 }
881
882 /* emit an exception if condition is fail */
883 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
884         do {                                                        \
885                 mono_add_patch_info (cfg, code - cfg->native_code,   \
886                                     MONO_PATCH_INFO_EXC, exc_name);  \
887                 x86_branch32 (code, cond, 0, signed);               \
888         } while (0); 
889
890 #define EMIT_FPCOMPARE(code) do { \
891         x86_fcompp (code); \
892         x86_fnstsw (code); \
893 } while (0); 
894
895 static void
896 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
897 {
898         MonoInst *ins, *last_ins = NULL;
899         ins = bb->code;
900
901         while (ins) {
902
903                 switch (ins->opcode) {
904                 case OP_ICONST:
905                         /* reg = 0 -> XOR (reg, reg) */
906                         /* XOR sets cflags on x86, so we cant do it always */
907                         if (ins->inst_c0 == 0 && ins->next &&
908                             (ins->next->opcode == CEE_BR)) { 
909                                 ins->opcode = CEE_XOR;
910                                 ins->sreg1 = ins->dreg;
911                                 ins->sreg2 = ins->dreg;
912                         }
913                         break;
914                 case OP_MUL_IMM: 
915                         /* remove unnecessary multiplication with 1 */
916                         if (ins->inst_imm == 1) {
917                                 if (ins->dreg != ins->sreg1) {
918                                         ins->opcode = OP_MOVE;
919                                 } else {
920                                         last_ins->next = ins->next;                             
921                                         ins = ins->next;                                
922                                         continue;
923                                 }
924                         }
925                         break;
926                 case OP_COMPARE_IMM:
927                         /* OP_COMPARE_IMM (reg, 0) --> OP_X86_TEST_NULL (reg) */
928                         if (ins->inst_imm == 0 && ins->next &&
929                             (ins->next->opcode == CEE_BEQ || ins->next->opcode == CEE_BNE_UN ||
930                              ins->next->opcode == OP_CEQ)) {
931                                 ins->opcode = OP_X86_TEST_NULL;
932                         }     
933                         break;
934                 case OP_LOAD_MEMBASE:
935                 case OP_LOADI4_MEMBASE:
936                         /* 
937                          * OP_STORE_MEMBASE_REG reg, offset(basereg) 
938                          * OP_LOAD_MEMBASE offset(basereg), reg
939                          */
940                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
941                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
942                             ins->inst_basereg == last_ins->inst_destbasereg &&
943                             ins->inst_offset == last_ins->inst_offset) {
944                                 if (ins->dreg == last_ins->sreg1) {
945                                         last_ins->next = ins->next;                             
946                                         ins = ins->next;                                
947                                         continue;
948                                 } else {
949                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
950                                         ins->opcode = OP_MOVE;
951                                         ins->sreg1 = last_ins->sreg1;
952                                 }
953
954                         /* 
955                          * Note: reg1 must be different from the basereg in the second load
956                          * OP_LOAD_MEMBASE offset(basereg), reg1
957                          * OP_LOAD_MEMBASE offset(basereg), reg2
958                          * -->
959                          * OP_LOAD_MEMBASE offset(basereg), reg1
960                          * OP_MOVE reg1, reg2
961                          */
962                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
963                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
964                               ins->inst_basereg != last_ins->dreg &&
965                               ins->inst_basereg == last_ins->inst_basereg &&
966                               ins->inst_offset == last_ins->inst_offset) {
967
968                                 if (ins->dreg == last_ins->dreg) {
969                                         last_ins->next = ins->next;                             
970                                         ins = ins->next;                                
971                                         continue;
972                                 } else {
973                                         ins->opcode = OP_MOVE;
974                                         ins->sreg1 = last_ins->dreg;
975                                 }
976
977                                 //g_assert_not_reached ();
978
979 #if 0
980                         /* 
981                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
982                          * OP_LOAD_MEMBASE offset(basereg), reg
983                          * -->
984                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
985                          * OP_ICONST reg, imm
986                          */
987                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
988                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
989                                    ins->inst_basereg == last_ins->inst_destbasereg &&
990                                    ins->inst_offset == last_ins->inst_offset) {
991                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
992                                 ins->opcode = OP_ICONST;
993                                 ins->inst_c0 = last_ins->inst_imm;
994                                 g_assert_not_reached (); // check this rule
995 #endif
996                         }
997                         break;
998                 case OP_LOADU1_MEMBASE:
999                 case OP_LOADI1_MEMBASE:
1000                   /*
1001                    * FIXME: Missing explanation
1002                    */
1003                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1004                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1005                                         ins->inst_offset == last_ins->inst_offset) {
1006                                 if (ins->dreg == last_ins->sreg1) {
1007                                         last_ins->next = ins->next;                             
1008                                         ins = ins->next;                                
1009                                         continue;
1010                                 } else {
1011                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1012                                         ins->opcode = OP_MOVE;
1013                                         ins->sreg1 = last_ins->sreg1;
1014                                 }
1015                         }
1016                         break;
1017                 case OP_LOADU2_MEMBASE:
1018                 case OP_LOADI2_MEMBASE:
1019                   /*
1020                    * FIXME: Missing explanation
1021                    */
1022                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1023                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1024                                         ins->inst_offset == last_ins->inst_offset) {
1025                                 if (ins->dreg == last_ins->sreg1) {
1026                                         last_ins->next = ins->next;                             
1027                                         ins = ins->next;                                
1028                                         continue;
1029                                 } else {
1030                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1031                                         ins->opcode = OP_MOVE;
1032                                         ins->sreg1 = last_ins->sreg1;
1033                                 }
1034                         }
1035                         break;
1036                 case CEE_CONV_I4:
1037                 case CEE_CONV_U4:
1038                 case OP_MOVE:
1039                         /* 
1040                          * OP_MOVE reg, reg 
1041                          */
1042                         if (ins->dreg == ins->sreg1) {
1043                                 if (last_ins)
1044                                         last_ins->next = ins->next;                             
1045                                 ins = ins->next;
1046                                 continue;
1047                         }
1048                         /* 
1049                          * OP_MOVE sreg, dreg 
1050                          * OP_MOVE dreg, sreg
1051                          */
1052                         if (last_ins && last_ins->opcode == OP_MOVE &&
1053                             ins->sreg1 == last_ins->dreg &&
1054                             ins->dreg == last_ins->sreg1) {
1055                                 last_ins->next = ins->next;                             
1056                                 ins = ins->next;                                
1057                                 continue;
1058                         }
1059                         break;
1060                 }
1061                 last_ins = ins;
1062                 ins = ins->next;
1063         }
1064         bb->last_ins = last_ins;
1065 }
1066
1067 static const int 
1068 branch_cc_table [] = {
1069         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1070         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1071         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1072 };
1073
1074 #define DEBUG(a) if (cfg->verbose_level > 1) a
1075 //#define DEBUG(a)
1076 #define reg_is_freeable(r) ((r) >= 0 && (r) <= 7 && X86_IS_CALLEE ((r)))
1077
1078 typedef struct {
1079         int born_in;
1080         int killed_in;
1081         int last_use;
1082         int prev_use;
1083 } RegTrack;
1084
1085 static const char*const * ins_spec = pentium;
1086
1087 static void
1088 print_ins (int i, MonoInst *ins)
1089 {
1090         const char *spec = ins_spec [ins->opcode];
1091         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1092         if (spec [MONO_INST_DEST]) {
1093                 if (ins->dreg >= MONO_MAX_IREGS)
1094                         g_print (" R%d <-", ins->dreg);
1095                 else
1096                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1097         }
1098         if (spec [MONO_INST_SRC1]) {
1099                 if (ins->sreg1 >= MONO_MAX_IREGS)
1100                         g_print (" R%d", ins->sreg1);
1101                 else
1102                         g_print (" %s", mono_arch_regname (ins->sreg1));
1103         }
1104         if (spec [MONO_INST_SRC2]) {
1105                 if (ins->sreg2 >= MONO_MAX_IREGS)
1106                         g_print (" R%d", ins->sreg2);
1107                 else
1108                         g_print (" %s", mono_arch_regname (ins->sreg2));
1109         }
1110         if (spec [MONO_INST_CLOB])
1111                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1112         g_print ("\n");
1113 }
1114
1115 static void
1116 print_regtrack (RegTrack *t, int num)
1117 {
1118         int i;
1119         char buf [32];
1120         const char *r;
1121         
1122         for (i = 0; i < num; ++i) {
1123                 if (!t [i].born_in)
1124                         continue;
1125                 if (i >= MONO_MAX_IREGS) {
1126                         g_snprintf (buf, sizeof(buf), "R%d", i);
1127                         r = buf;
1128                 } else
1129                         r = mono_arch_regname (i);
1130                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1131         }
1132 }
1133
1134 typedef struct InstList InstList;
1135
1136 struct InstList {
1137         InstList *prev;
1138         InstList *next;
1139         MonoInst *data;
1140 };
1141
1142 static inline InstList*
1143 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1144 {
1145         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1146         item->data = data;
1147         item->prev = NULL;
1148         item->next = list;
1149         if (list)
1150                 list->prev = item;
1151         return item;
1152 }
1153
1154 /*
1155  * Force the spilling of the variable in the symbolic register 'reg'.
1156  */
1157 static int
1158 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1159 {
1160         MonoInst *load;
1161         int i, sel, spill;
1162         
1163         sel = cfg->rs->iassign [reg];
1164         /*i = cfg->rs->isymbolic [sel];
1165         g_assert (i == reg);*/
1166         i = reg;
1167         spill = ++cfg->spill_count;
1168         cfg->rs->iassign [i] = -spill - 1;
1169         mono_regstate_free_int (cfg->rs, sel);
1170         /* we need to create a spill var and insert a load to sel after the current instruction */
1171         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1172         load->dreg = sel;
1173         load->inst_basereg = X86_EBP;
1174         load->inst_offset = mono_spillvar_offset (cfg, spill);
1175         if (item->prev) {
1176                 while (ins->next != item->prev->data)
1177                         ins = ins->next;
1178         }
1179         load->next = ins->next;
1180         ins->next = load;
1181         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1182         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1183         g_assert (i == sel);
1184
1185         return sel;
1186 }
1187
1188 static int
1189 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1190 {
1191         MonoInst *load;
1192         int i, sel, spill;
1193
1194         DEBUG (g_print ("start regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1195         /* exclude the registers in the current instruction */
1196         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1197                 if (ins->sreg1 >= MONO_MAX_IREGS)
1198                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1199                 else
1200                         regmask &= ~ (1 << ins->sreg1);
1201                 DEBUG (g_print ("excluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1202         }
1203         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1204                 if (ins->sreg2 >= MONO_MAX_IREGS)
1205                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1206                 else
1207                         regmask &= ~ (1 << ins->sreg2);
1208                 DEBUG (g_print ("excluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1209         }
1210         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1211                 regmask &= ~ (1 << ins->dreg);
1212                 DEBUG (g_print ("excluding dreg %s\n", mono_arch_regname (ins->dreg)));
1213         }
1214
1215         DEBUG (g_print ("available regmask: 0x%08x\n", regmask));
1216         g_assert (regmask); /* need at least a register we can free */
1217         sel = -1;
1218         /* we should track prev_use and spill the register that's farther */
1219         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1220                 if (regmask & (1 << i)) {
1221                         sel = i;
1222                         DEBUG (g_print ("selected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1223                         break;
1224                 }
1225         }
1226         i = cfg->rs->isymbolic [sel];
1227         spill = ++cfg->spill_count;
1228         cfg->rs->iassign [i] = -spill - 1;
1229         mono_regstate_free_int (cfg->rs, sel);
1230         /* we need to create a spill var and insert a load to sel after the current instruction */
1231         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1232         load->dreg = sel;
1233         load->inst_basereg = X86_EBP;
1234         load->inst_offset = mono_spillvar_offset (cfg, spill);
1235         if (item->prev) {
1236                 while (ins->next != item->prev->data)
1237                         ins = ins->next;
1238         }
1239         load->next = ins->next;
1240         ins->next = load;
1241         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1242         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1243         g_assert (i == sel);
1244         
1245         return sel;
1246 }
1247
1248 static MonoInst*
1249 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1250 {
1251         MonoInst *copy;
1252         MONO_INST_NEW (cfg, copy, OP_MOVE);
1253         copy->dreg = dest;
1254         copy->sreg1 = src;
1255         if (ins) {
1256                 copy->next = ins->next;
1257                 ins->next = copy;
1258         }
1259         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1260         return copy;
1261 }
1262
1263 static MonoInst*
1264 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1265 {
1266         MonoInst *store;
1267         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1268         store->sreg1 = reg;
1269         store->inst_destbasereg = X86_EBP;
1270         store->inst_offset = mono_spillvar_offset (cfg, spill);
1271         if (ins) {
1272                 store->next = ins->next;
1273                 ins->next = store;
1274         }
1275         DEBUG (g_print ("SPILLED STORE (%d at 0x%08x(%%ebp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
1276         return store;
1277 }
1278
1279 static void
1280 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1281 {
1282         MonoInst *prev;
1283         if (item->next) {
1284                 prev = item->next->data;
1285
1286                 while (prev->next != ins)
1287                         prev = prev->next;
1288                 to_insert->next = ins;
1289                 prev->next = to_insert;
1290         } else {
1291                 to_insert->next = ins;
1292         }
1293         /* 
1294          * needed otherwise in the next instruction we can add an ins to the 
1295          * end and that would get past this instruction.
1296          */
1297         item->data = to_insert; 
1298 }
1299
1300 #if  0
1301 static int
1302 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1303 {
1304         int val = cfg->rs->iassign [sym_reg];
1305         if (val < 0) {
1306                 int spill = 0;
1307                 if (val < -1) {
1308                         /* the register gets spilled after this inst */
1309                         spill = -val -1;
1310                 }
1311                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1312                 if (val < 0)
1313                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1314                 cfg->rs->iassign [sym_reg] = val;
1315                 /* add option to store before the instruction for src registers */
1316                 if (spill)
1317                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1318         }
1319         cfg->rs->isymbolic [val] = sym_reg;
1320         return val;
1321 }
1322 #endif
1323
1324 #include "cprop.c"
1325
1326 /*
1327  * Local register allocation.
1328  * We first scan the list of instructions and we save the liveness info of
1329  * each register (when the register is first used, when it's value is set etc.).
1330  * We also reverse the list of instructions (in the InstList list) because assigning
1331  * registers backwards allows for more tricks to be used.
1332  */
1333 void
1334 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1335 {
1336         MonoInst *ins;
1337         MonoRegState *rs = cfg->rs;
1338         int i, val, fpcount;
1339         RegTrack *reginfo, *reginfof;
1340         RegTrack *reginfo1, *reginfo2, *reginfod;
1341         InstList *tmp, *reversed = NULL;
1342         const char *spec;
1343         guint32 src1_mask, src2_mask, dest_mask;
1344
1345         if (!bb->code)
1346                 return;
1347         rs->next_vireg = bb->max_ireg;
1348         rs->next_vfreg = bb->max_freg;
1349         mono_regstate_assign (rs);
1350         reginfo = mono_mempool_alloc0 (cfg->mempool, sizeof (RegTrack) * rs->next_vireg);
1351         reginfof = mono_mempool_alloc0 (cfg->mempool, sizeof (RegTrack) * rs->next_vfreg);
1352         rs->ifree_mask = X86_CALLEE_REGS;
1353
1354         ins = bb->code;
1355
1356         if (cfg->opt & MONO_OPT_COPYPROP)
1357                 local_copy_prop (cfg, ins);
1358         
1359         i = 1;
1360         fpcount = 0; /* FIXME: track fp stack utilization */
1361         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1362         /* forward pass on the instructions to collect register liveness info */
1363         while (ins) {
1364                 spec = ins_spec [ins->opcode];
1365                 DEBUG (print_ins (i, ins));
1366                 if (spec [MONO_INST_SRC1]) {
1367                         if (spec [MONO_INST_SRC1] == 'f')
1368                                 reginfo1 = reginfof;
1369                         else
1370                                 reginfo1 = reginfo;
1371                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1372                         reginfo1 [ins->sreg1].last_use = i;
1373                 } else {
1374                         ins->sreg1 = -1;
1375                 }
1376                 if (spec [MONO_INST_SRC2]) {
1377                         if (spec [MONO_INST_SRC2] == 'f')
1378                                 reginfo2 = reginfof;
1379                         else
1380                                 reginfo2 = reginfo;
1381                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1382                         reginfo2 [ins->sreg2].last_use = i;
1383                 } else {
1384                         ins->sreg2 = -1;
1385                 }
1386                 if (spec [MONO_INST_DEST]) {
1387                         if (spec [MONO_INST_DEST] == 'f')
1388                                 reginfod = reginfof;
1389                         else
1390                                 reginfod = reginfo;
1391                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1392                                 reginfod [ins->dreg].killed_in = i;
1393                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1394                         reginfod [ins->dreg].last_use = i;
1395                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1396                                 reginfod [ins->dreg].born_in = i;
1397                         if (spec [MONO_INST_DEST] == 'l') {
1398                                 /* result in eax:edx, the virtual register is allocated sequentially */
1399                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1400                                 reginfod [ins->dreg + 1].last_use = i;
1401                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1402                                         reginfod [ins->dreg + 1].born_in = i;
1403                         }
1404                 } else {
1405                         ins->dreg = -1;
1406                 }
1407                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
1408                 ++i;
1409                 ins = ins->next;
1410         }
1411
1412         DEBUG (print_regtrack (reginfo, rs->next_vireg));
1413         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
1414         tmp = reversed;
1415         while (tmp) {
1416                 int prev_dreg, prev_sreg1, prev_sreg2;
1417                 dest_mask = src1_mask = src2_mask = X86_CALLEE_REGS;
1418                 --i;
1419                 ins = tmp->data;
1420                 spec = ins_spec [ins->opcode];
1421                 DEBUG (g_print ("processing:"));
1422                 DEBUG (print_ins (i, ins));
1423                 if (spec [MONO_INST_CLOB] == 's') {
1424                         if (rs->ifree_mask & (1 << X86_ECX)) {
1425                                 DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
1426                                 rs->iassign [ins->sreg2] = X86_ECX;
1427                                 rs->isymbolic [X86_ECX] = ins->sreg2;
1428                                 ins->sreg2 = X86_ECX;
1429                                 rs->ifree_mask &= ~ (1 << X86_ECX);
1430                         } else {
1431                                 int need_ecx_spill = TRUE;
1432                                 /* 
1433                                  * we first check if src1/dreg is already assigned a register
1434                                  * and then we force a spill of the var assigned to ECX.
1435                                  */
1436                                 /* the destination register can't be ECX */
1437                                 dest_mask &= ~ (1 << X86_ECX);
1438                                 src1_mask &= ~ (1 << X86_ECX);
1439                                 val = rs->iassign [ins->dreg];
1440                                 /* 
1441                                  * the destination register is already assigned to ECX:
1442                                  * we need to allocate another register for it and then
1443                                  * copy from this to ECX.
1444                                  */
1445                                 if (val == X86_ECX && ins->dreg != ins->sreg2) {
1446                                         int new_dest = mono_regstate_alloc_int (rs, dest_mask);
1447                                         if (new_dest < 0)
1448                                                 new_dest = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
1449                                         g_assert (new_dest >= 0);
1450                                         ins->dreg = new_dest;
1451                                         create_copy_ins (cfg, X86_ECX, new_dest, ins);
1452                                         need_ecx_spill = FALSE;
1453                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
1454                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
1455                                         rs->iassign [ins->dreg] = val;
1456                                         rs->isymbolic [val] = prev_dreg;
1457                                         ins->dreg = val;*/
1458                                 }
1459                                 val = rs->iassign [ins->sreg1];
1460                                 if (val == X86_ECX) {
1461                                         g_assert_not_reached ();
1462                                 } else if (val >= 0) {
1463                                         /* 
1464                                          * the first src reg was already assigned to a register,
1465                                          * we need to copy it to the dest register because the 
1466                                          * shift instruction clobbers the first operand.
1467                                          */
1468                                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, val, NULL);
1469                                         insert_before_ins (ins, tmp, copy);
1470                                 }
1471                                 val = rs->iassign [ins->sreg2];
1472                                 if (val >= 0 && val != X86_ECX) {
1473                                         MonoInst *move = create_copy_ins (cfg, X86_ECX, val, NULL);
1474                                         DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
1475                                         move->next = ins;
1476                                         g_assert_not_reached ();
1477                                         /* FIXME: where is move connected to the instruction list? */
1478                                         //tmp->prev->data->next = move;
1479                                 }
1480                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << X86_ECX))) {
1481                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_ECX]));
1482                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_ECX]);
1483                                         mono_regstate_free_int (rs, X86_ECX);
1484                                 }
1485                                 /* force-set sreg2 */
1486                                 rs->iassign [ins->sreg2] = X86_ECX;
1487                                 rs->isymbolic [X86_ECX] = ins->sreg2;
1488                                 ins->sreg2 = X86_ECX;
1489                                 rs->ifree_mask &= ~ (1 << X86_ECX);
1490                         }
1491                 } else if (spec [MONO_INST_CLOB] == 'd') { /* division */
1492                         int dest_reg = X86_EAX;
1493                         if (spec [MONO_INST_DEST] == 'd')
1494                                 dest_reg = X86_EDX; /* reminder */
1495                         val = rs->iassign [ins->dreg];
1496                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
1497                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
1498                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
1499                                 mono_regstate_free_int (rs, dest_reg);
1500                         }
1501                         if (val < 0) {
1502                                 if (val < -1) {
1503                                         /* the register gets spilled after this inst */
1504                                         int spill = -val -1;
1505                                         dest_mask = 1 << (dest_reg == X86_EAX? X86_EDX: X86_EAX);
1506                                         prev_dreg = ins->dreg;
1507                                         val = mono_regstate_alloc_int (rs, dest_mask);
1508                                         if (val < 0)
1509                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
1510                                         rs->iassign [ins->dreg] = val;
1511                                         if (spill)
1512                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
1513                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
1514                                         rs->isymbolic [val] = prev_dreg;
1515                                         ins->dreg = val;
1516                                         if (val != dest_reg) { /* force a copy */
1517                                                 create_copy_ins (cfg, val, dest_reg, ins);
1518                                         }
1519                                 } else {
1520                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
1521                                         rs->iassign [ins->dreg] = dest_reg;
1522                                         rs->isymbolic [dest_reg] = ins->dreg;
1523                                         ins->dreg = dest_reg;
1524                                         rs->ifree_mask &= ~ (1 << dest_reg);
1525                                 }
1526                         } else {
1527                                 //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
1528                                 if (val != dest_reg) { /* force a copy */
1529                                         create_copy_ins (cfg, val, dest_reg, ins);
1530                                 }
1531                         }
1532                         src1_mask = 1 << X86_EAX;
1533                         src2_mask = 1 << X86_ECX;
1534                 }
1535                 if (spec [MONO_INST_DEST] == 'l') {
1536                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
1537                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
1538                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
1539                                 mono_regstate_free_int (rs, X86_EAX);
1540                         }
1541                         if (!(rs->ifree_mask & (1 << X86_EDX))) {
1542                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EDX]));
1543                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
1544                                 mono_regstate_free_int (rs, X86_EDX);
1545                         }
1546                 }
1547                 /* update for use with FP regs... */
1548                 if (spec [MONO_INST_DEST] != 'f' && ins->dreg >= MONO_MAX_IREGS) {
1549                         val = rs->iassign [ins->dreg];
1550                         prev_dreg = ins->dreg;
1551                         if (val < 0) {
1552                                 int spill = 0;
1553                                 if (val < -1) {
1554                                         /* the register gets spilled after this inst */
1555                                         spill = -val -1;
1556                                 }
1557                                 val = mono_regstate_alloc_int (rs, dest_mask);
1558                                 if (val < 0)
1559                                         val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
1560                                 rs->iassign [ins->dreg] = val;
1561                                 if (spill)
1562                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
1563                         }
1564                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
1565                         rs->isymbolic [val] = prev_dreg;
1566                         ins->dreg = val;
1567                         if (spec [MONO_INST_DEST] == 'l') {
1568                                 int hreg = prev_dreg + 1;
1569                                 val = rs->iassign [hreg];
1570                                 if (val < 0) {
1571                                         int spill = 0;
1572                                         if (val < -1) {
1573                                                 /* the register gets spilled after this inst */
1574                                                 spill = -val -1;
1575                                         }
1576                                         val = mono_regstate_alloc_int (rs, dest_mask);
1577                                         if (val < 0)
1578                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, hreg);
1579                                         rs->iassign [hreg] = val;
1580                                         if (spill)
1581                                                 create_spilled_store (cfg, spill, val, hreg, ins);
1582                                 }
1583                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
1584                                 rs->isymbolic [val] = hreg;
1585                                 /* FIXME:? ins->dreg = val; */
1586                                 if (ins->dreg == X86_EAX) {
1587                                         if (val != X86_EDX)
1588                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1589                                 } else if (ins->dreg == X86_EDX) {
1590                                         if (val == X86_EAX) {
1591                                                 /* swap */
1592                                                 g_assert_not_reached ();
1593                                         } else {
1594                                                 /* two forced copies */
1595                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1596                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1597                                         }
1598                                 } else {
1599                                         if (val == X86_EDX) {
1600                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1601                                         } else {
1602                                                 /* two forced copies */
1603                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1604                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1605                                         }
1606                                 }
1607                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
1608                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
1609                                         mono_regstate_free_int (rs, val);
1610                                 }
1611                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != X86_EAX && spec [MONO_INST_CLOB] != 'd') {
1612                                 /* this instruction only outputs to EAX, need to copy */
1613                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1614                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != X86_EDX && spec [MONO_INST_CLOB] != 'd') {
1615                                 create_copy_ins (cfg, ins->dreg, X86_EDX, ins);
1616                         }
1617                 } else {
1618                         prev_dreg = -1;
1619                 }
1620                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
1621                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
1622                         mono_regstate_free_int (rs, ins->dreg);
1623                 }
1624                 if (spec [MONO_INST_SRC1] != 'f' && ins->sreg1 >= MONO_MAX_IREGS) {
1625                         val = rs->iassign [ins->sreg1];
1626                         prev_sreg1 = ins->sreg1;
1627                         if (val < 0) {
1628                                 int spill = 0;
1629                                 if (val < -1) {
1630                                         /* the register gets spilled after this inst */
1631                                         spill = -val -1;
1632                                 }
1633                                 if (0 && ins->opcode == OP_MOVE) {
1634                                         /* 
1635                                          * small optimization: the dest register is already allocated
1636                                          * but the src one is not: we can simply assign the same register
1637                                          * here and peephole will get rid of the instruction later.
1638                                          * This optimization may interfere with the clobbering handling:
1639                                          * it removes a mov operation that will be added again to handle clobbering.
1640                                          * There are also some other issues that should with make testjit.
1641                                          */
1642                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
1643                                         val = rs->iassign [ins->sreg1] = ins->dreg;
1644                                         //g_assert (val >= 0);
1645                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
1646                                 } else {
1647                                         //g_assert (val == -1); /* source cannot be spilled */
1648                                         val = mono_regstate_alloc_int (rs, src1_mask);
1649                                         if (val < 0)
1650                                                 val = get_register_spilling (cfg, tmp, ins, src1_mask, ins->sreg1);
1651                                         rs->iassign [ins->sreg1] = val;
1652                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
1653                                 }
1654                                 if (spill) {
1655                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
1656                                         insert_before_ins (ins, tmp, store);
1657                                 }
1658                         }
1659                         rs->isymbolic [val] = prev_sreg1;
1660                         ins->sreg1 = val;
1661                 } else {
1662                         prev_sreg1 = -1;
1663                 }
1664                 /* handle clobbering of sreg1 */
1665                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
1666                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
1667                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
1668                         if (ins->sreg2 == -1 || spec [MONO_INST_CLOB] == 's') {
1669                                 /* note: the copy is inserted before the current instruction! */
1670                                 insert_before_ins (ins, tmp, copy);
1671                                 /* we set sreg1 to dest as well */
1672                                 prev_sreg1 = ins->sreg1 = ins->dreg;
1673                         } else {
1674                                 /* inserted after the operation */
1675                                 copy->next = ins->next;
1676                                 ins->next = copy;
1677                         }
1678                 }
1679                 if (spec [MONO_INST_SRC2] != 'f' && ins->sreg2 >= MONO_MAX_IREGS) {
1680                         val = rs->iassign [ins->sreg2];
1681                         prev_sreg2 = ins->sreg2;
1682                         if (val < 0) {
1683                                 int spill = 0;
1684                                 if (val < -1) {
1685                                         /* the register gets spilled after this inst */
1686                                         spill = -val -1;
1687                                 }
1688                                 val = mono_regstate_alloc_int (rs, src2_mask);
1689                                 if (val < 0)
1690                                         val = get_register_spilling (cfg, tmp, ins, src2_mask, ins->sreg2);
1691                                 rs->iassign [ins->sreg2] = val;
1692                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
1693                                 if (spill)
1694                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
1695                         }
1696                         rs->isymbolic [val] = prev_sreg2;
1697                         ins->sreg2 = val;
1698                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != X86_ECX) {
1699                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [X86_ECX]));
1700                         }
1701                 } else {
1702                         prev_sreg2 = -1;
1703                 }
1704
1705                 if (spec [MONO_INST_CLOB] == 'c') {
1706                         int j, s;
1707                         guint32 clob_mask = X86_CALLEE_REGS;
1708                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
1709                                 s = 1 << j;
1710                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
1711                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
1712                                 }
1713                         }
1714                 }
1715                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
1716                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
1717                         mono_regstate_free_int (rs, ins->sreg1);
1718                 }
1719                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
1720                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
1721                         mono_regstate_free_int (rs, ins->sreg2);
1722                 }*/
1723                 
1724                 //DEBUG (print_ins (i, ins));
1725                 /* this may result from a insert_before call */
1726                 if (!tmp->next)
1727                         bb->code = tmp->data;
1728                 tmp = tmp->next;
1729         }
1730 }
1731
1732 static unsigned char*
1733 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1734 {
1735         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1736         x86_fnstcw_membase(code, X86_ESP, 0);
1737         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1738         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1739         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1740         x86_fldcw_membase (code, X86_ESP, 2);
1741         if (size == 8) {
1742                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1743                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1744                 x86_pop_reg (code, dreg);
1745                 /* FIXME: need the high register 
1746                  * x86_pop_reg (code, dreg_high);
1747                  */
1748         } else {
1749                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1750                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1751                 x86_pop_reg (code, dreg);
1752         }
1753         x86_fldcw_membase (code, X86_ESP, 0);
1754         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1755
1756         if (size == 1)
1757                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1758         else if (size == 2)
1759                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1760         return code;
1761 }
1762
1763 static unsigned char*
1764 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1765 {
1766         int sreg = tree->sreg1;
1767 #ifdef PLATFORM_WIN32
1768         guint8* br[5];
1769
1770         /*
1771          * Under Windows:
1772          * If requested stack size is larger than one page,
1773          * perform stack-touch operation
1774          */
1775         /*
1776          * Generate stack probe code.
1777          * Under Windows, it is necessary to allocate one page at a time,
1778          * "touching" stack after each successful sub-allocation. This is
1779          * because of the way stack growth is implemented - there is a
1780          * guard page before the lowest stack page that is currently commited.
1781          * Stack normally grows sequentially so OS traps access to the
1782          * guard page and commits more pages when needed.
1783          */
1784         x86_test_reg_imm (code, sreg, ~0xFFF);
1785         br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1786
1787         br[2] = code; /* loop */
1788         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1789         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1790         x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1791         x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1792         br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1793         x86_patch (br[3], br[2]);
1794         x86_test_reg_reg (code, sreg, sreg);
1795         br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1796         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1797
1798         br[1] = code; x86_jump8 (code, 0);
1799
1800         x86_patch (br[0], code);
1801         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1802         x86_patch (br[1], code);
1803         x86_patch (br[4], code);
1804 #else /* PLATFORM_WIN32 */
1805         x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1806 #endif
1807         if (tree->flags & MONO_INST_INIT) {
1808                 int offset = 0;
1809                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1810                         x86_push_reg (code, X86_EAX);
1811                         offset += 4;
1812                 }
1813                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1814                         x86_push_reg (code, X86_ECX);
1815                         offset += 4;
1816                 }
1817                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1818                         x86_push_reg (code, X86_EDI);
1819                         offset += 4;
1820                 }
1821                 
1822                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1823                 if (sreg != X86_ECX)
1824                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1825                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1826                                 
1827                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1828                 x86_cld (code);
1829                 x86_prefix (code, X86_REP_PREFIX);
1830                 x86_stosl (code);
1831                 
1832                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1833                         x86_pop_reg (code, X86_EDI);
1834                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1835                         x86_pop_reg (code, X86_ECX);
1836                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1837                         x86_pop_reg (code, X86_EAX);
1838         }
1839         return code;
1840 }
1841
1842 #define REAL_PRINT_REG(text,reg) \
1843 mono_assert (reg >= 0); \
1844 x86_push_reg (code, X86_EAX); \
1845 x86_push_reg (code, X86_EDX); \
1846 x86_push_reg (code, X86_ECX); \
1847 x86_push_reg (code, reg); \
1848 x86_push_imm (code, reg); \
1849 x86_push_imm (code, text " %d %p\n"); \
1850 x86_mov_reg_imm (code, X86_EAX, printf); \
1851 x86_call_reg (code, X86_EAX); \
1852 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1853 x86_pop_reg (code, X86_ECX); \
1854 x86_pop_reg (code, X86_EDX); \
1855 x86_pop_reg (code, X86_EAX);
1856
1857 void
1858 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1859 {
1860         MonoInst *ins;
1861         MonoCallInst *call;
1862         guint offset;
1863         guint8 *code = cfg->native_code + cfg->code_len;
1864         MonoInst *last_ins = NULL;
1865         guint last_offset = 0;
1866         int max_len, cpos;
1867
1868         if (cfg->opt & MONO_OPT_PEEPHOLE)
1869                 peephole_pass (cfg, bb);
1870
1871 #if 0
1872         /* 
1873          * various stratgies to align BBs. Using real loop detection or simply
1874          * aligning every block leads to more consistent benchmark results,
1875          * but usually slows down the code
1876          * we should do the alignment outside this function or we should adjust
1877          * bb->native offset as well or the code is effectively slowed down!
1878          */
1879         /* align all blocks */
1880 //      if ((pad = (cfg->code_len & (align - 1)))) {
1881         /* poor man loop start detection */
1882 //      if (bb->code && bb->in_count && bb->in_bb [0]->cil_code > bb->cil_code && (pad = (cfg->code_len & (align - 1)))) {
1883         /* consider real loop detection and nesting level */
1884 //      if (bb->loop_blocks && bb->nesting < 3 && (pad = (cfg->code_len & (align - 1)))) {
1885         /* consider real loop detection */
1886         if (bb->loop_blocks && (pad = (cfg->code_len & (align - 1)))) {
1887                 pad = align - pad;
1888                 x86_padding (code, pad);
1889                 cfg->code_len += pad;
1890                 bb->native_offset = cfg->code_len;
1891         }
1892 #endif
1893
1894         if (cfg->verbose_level > 2)
1895                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1896
1897         cpos = bb->max_offset;
1898
1899         if (mono_trace_coverage) {
1900                 MonoCoverageInfo *cov = mono_get_coverage_info (cfg->method);
1901                 g_assert (!mono_compile_aot);
1902                 cpos += 6;
1903
1904                 // fixme: make this work with inlining
1905                 g_assert_not_reached ();
1906                 //if (bb->cil_code)
1907                 //cov->data [bb->dfn].iloffset = bb->cil_code - cfg->cil_code;
1908                 /* this is not thread save, but good enough */
1909                 /* fixme: howto handle overflows? */
1910                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
1911         }
1912
1913         offset = code - cfg->native_code;
1914
1915         ins = bb->code;
1916         while (ins) {
1917                 offset = code - cfg->native_code;
1918
1919                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
1920
1921                 if (offset > (cfg->code_size - max_len - 16)) {
1922                         cfg->code_size *= 2;
1923                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
1924                         code = cfg->native_code + offset;
1925                         mono_jit_stats.code_reallocs++;
1926                 }
1927
1928                 mono_debug_record_line_number (cfg, ins, offset);
1929
1930                 switch (ins->opcode) {
1931                 case OP_STOREI1_MEMBASE_IMM:
1932                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
1933                         break;
1934                 case OP_STOREI2_MEMBASE_IMM:
1935                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
1936                         break;
1937                 case OP_STORE_MEMBASE_IMM:
1938                 case OP_STOREI4_MEMBASE_IMM:
1939                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
1940                         break;
1941                 case OP_STOREI1_MEMBASE_REG:
1942                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
1943                         break;
1944                 case OP_STOREI2_MEMBASE_REG:
1945                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
1946                         break;
1947                 case OP_STORE_MEMBASE_REG:
1948                 case OP_STOREI4_MEMBASE_REG:
1949                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
1950                         break;
1951                 case CEE_LDIND_I:
1952                 case CEE_LDIND_I4:
1953                 case CEE_LDIND_U4:
1954                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
1955                         break;
1956                 case OP_LOADU4_MEM:
1957                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
1958                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
1959                         break;
1960                 case OP_LOAD_MEMBASE:
1961                 case OP_LOADI4_MEMBASE:
1962                 case OP_LOADU4_MEMBASE:
1963                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
1964                         break;
1965                 case OP_LOADU1_MEMBASE:
1966                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
1967                         break;
1968                 case OP_LOADI1_MEMBASE:
1969                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
1970                         break;
1971                 case OP_LOADU2_MEMBASE:
1972                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
1973                         break;
1974                 case OP_LOADI2_MEMBASE:
1975                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
1976                         break;
1977                 case CEE_CONV_I1:
1978                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
1979                         break;
1980                 case CEE_CONV_I2:
1981                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
1982                         break;
1983                 case CEE_CONV_U1:
1984                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
1985                         break;
1986                 case CEE_CONV_U2:
1987                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
1988                         break;
1989                 case OP_COMPARE:
1990                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
1991                         break;
1992                 case OP_COMPARE_IMM:
1993                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
1994                         break;
1995                 case OP_X86_COMPARE_MEMBASE_REG:
1996                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
1997                         break;
1998                 case OP_X86_COMPARE_MEMBASE_IMM:
1999                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2000                         break;
2001                 case OP_X86_COMPARE_REG_MEMBASE:
2002                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2003                         break;
2004                 case OP_X86_TEST_NULL:
2005                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2006                         break;
2007                 case OP_X86_ADD_MEMBASE_IMM:
2008                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2009                         break;
2010                 case OP_X86_SUB_MEMBASE_IMM:
2011                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2012                         break;
2013                 case OP_X86_INC_MEMBASE:
2014                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2015                         break;
2016                 case OP_X86_INC_REG:
2017                         x86_inc_reg (code, ins->dreg);
2018                         break;
2019                 case OP_X86_DEC_MEMBASE:
2020                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2021                         break;
2022                 case OP_X86_DEC_REG:
2023                         x86_dec_reg (code, ins->dreg);
2024                         break;
2025                 case CEE_BREAK:
2026                         x86_breakpoint (code);
2027                         break;
2028                 case OP_ADDCC:
2029                 case CEE_ADD:
2030                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2031                         break;
2032                 case OP_ADC:
2033                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2034                         break;
2035                 case OP_ADD_IMM:
2036                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2037                         break;
2038                 case OP_ADC_IMM:
2039                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2040                         break;
2041                 case OP_SUBCC:
2042                 case CEE_SUB:
2043                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2044                         break;
2045                 case OP_SBB:
2046                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2047                         break;
2048                 case OP_SUB_IMM:
2049                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2050                         break;
2051                 case OP_SBB_IMM:
2052                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2053                         break;
2054                 case CEE_AND:
2055                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2056                         break;
2057                 case OP_AND_IMM:
2058                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2059                         break;
2060                 case CEE_DIV:
2061                         x86_cdq (code);
2062                         x86_div_reg (code, ins->sreg2, TRUE);
2063                         break;
2064                 case CEE_DIV_UN:
2065                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2066                         x86_div_reg (code, ins->sreg2, FALSE);
2067                         break;
2068                 case OP_DIV_IMM:
2069                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2070                         x86_cdq (code);
2071                         x86_div_reg (code, ins->sreg2, TRUE);
2072                         break;
2073                 case CEE_REM:
2074                         x86_cdq (code);
2075                         x86_div_reg (code, ins->sreg2, TRUE);
2076                         break;
2077                 case CEE_REM_UN:
2078                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2079                         x86_div_reg (code, ins->sreg2, FALSE);
2080                         break;
2081                 case OP_REM_IMM:
2082                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2083                         x86_cdq (code);
2084                         x86_div_reg (code, ins->sreg2, TRUE);
2085                         break;
2086                 case CEE_OR:
2087                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2088                         break;
2089                 case OP_OR_IMM:
2090                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2091                         break;
2092                 case CEE_XOR:
2093                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2094                         break;
2095                 case OP_XOR_IMM:
2096                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2097                         break;
2098                 case CEE_SHL:
2099                         g_assert (ins->sreg2 == X86_ECX);
2100                         x86_shift_reg (code, X86_SHL, ins->dreg);
2101                         break;
2102                 case CEE_SHR:
2103                         g_assert (ins->sreg2 == X86_ECX);
2104                         x86_shift_reg (code, X86_SAR, ins->dreg);
2105                         break;
2106                 case OP_SHR_IMM:
2107                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2108                         break;
2109                 case OP_SHR_UN_IMM:
2110                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2111                         break;
2112                 case CEE_SHR_UN:
2113                         g_assert (ins->sreg2 == X86_ECX);
2114                         x86_shift_reg (code, X86_SHR, ins->dreg);
2115                         break;
2116                 case OP_SHL_IMM:
2117                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2118                         break;
2119                 case CEE_NOT:
2120                         x86_not_reg (code, ins->sreg1);
2121                         break;
2122                 case CEE_NEG:
2123                         x86_neg_reg (code, ins->sreg1);
2124                         break;
2125                 case CEE_MUL:
2126                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2127                         break;
2128                 case OP_MUL_IMM:
2129                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2130                         break;
2131                 case CEE_MUL_OVF:
2132                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2133                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2134                         break;
2135                 case CEE_MUL_OVF_UN: {
2136                         /* the mul operation and the exception check should most likely be split */
2137                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2138                         /*g_assert (ins->sreg2 == X86_EAX);
2139                         g_assert (ins->dreg == X86_EAX);*/
2140                         if (ins->sreg2 == X86_EAX) {
2141                                 non_eax_reg = ins->sreg1;
2142                         } else if (ins->sreg1 == X86_EAX) {
2143                                 non_eax_reg = ins->sreg2;
2144                         } else {
2145                                 /* no need to save since we're going to store to it anyway */
2146                                 if (ins->dreg != X86_EAX) {
2147                                         saved_eax = TRUE;
2148                                         x86_push_reg (code, X86_EAX);
2149                                 }
2150                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2151                                 non_eax_reg = ins->sreg2;
2152                         }
2153                         if (ins->dreg == X86_EDX) {
2154                                 if (!saved_eax) {
2155                                         saved_eax = TRUE;
2156                                         x86_push_reg (code, X86_EAX);
2157                                 }
2158                         } else if (ins->dreg != X86_EAX) {
2159                                 saved_edx = TRUE;
2160                                 x86_push_reg (code, X86_EDX);
2161                         }
2162                         x86_mul_reg (code, non_eax_reg, FALSE);
2163                         /* save before the check since pop and mov don't change the flags */
2164                         if (saved_edx)
2165                                 x86_pop_reg (code, X86_EDX);
2166                         if (saved_eax)
2167                                 x86_pop_reg (code, X86_EAX);
2168                         if (ins->dreg != X86_EAX)
2169                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2170                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2171                         break;
2172                 }
2173                 case OP_ICONST:
2174                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2175                         break;
2176                 case OP_AOTCONST:
2177                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2178                         x86_mov_reg_imm (code, ins->dreg, 0);
2179                         break;
2180                 case CEE_CONV_I4:
2181                 case CEE_CONV_U4:
2182                 case OP_MOVE:
2183                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2184                         break;
2185                 case CEE_JMP: {
2186                         /*
2187                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2188                          */
2189                         int pos = -4;
2190                         if (cfg->used_int_regs & (1 << X86_EBX)) {
2191                                 x86_mov_reg_membase (code, X86_EBX, X86_EBP, pos, 4);
2192                                 pos -= 4;
2193                         }
2194                         if (cfg->used_int_regs & (1 << X86_EDI)) {
2195                                 x86_mov_reg_membase (code, X86_EDI, X86_EBP, pos, 4);
2196                                 pos -= 4;
2197                         }
2198                         if (cfg->used_int_regs & (1 << X86_ESI)) {
2199                                 x86_mov_reg_membase (code, X86_ESI, X86_EBP, pos, 4);
2200                                 pos -= 4;
2201                         }
2202                         /* restore ESP/EBP */
2203                         x86_leave (code);
2204                         offset = code - cfg->native_code;
2205                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, ins->inst_p0);
2206                         x86_jump32 (code, 0);
2207                         break;
2208                 }
2209                 case OP_CHECK_THIS:
2210                         /* ensure ins->sreg1 is not NULL */
2211                         x86_alu_membase_imm (code, X86_CMP, ins->sreg1, 0, 0);
2212                         break;
2213                 case OP_FCALL:
2214                 case OP_LCALL:
2215                 case OP_VCALL:
2216                 case OP_VOIDCALL:
2217                 case CEE_CALL:
2218                         call = (MonoCallInst*)ins;
2219                         if (ins->flags & MONO_INST_HAS_METHOD)
2220                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD, call->method);
2221                         else {
2222                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, call->fptr);
2223                         }
2224                         x86_call_code (code, 0);
2225                         if (call->stack_usage)
2226                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2227                         break;
2228                 case OP_FCALL_REG:
2229                 case OP_LCALL_REG:
2230                 case OP_VCALL_REG:
2231                 case OP_VOIDCALL_REG:
2232                 case OP_CALL_REG:
2233                         call = (MonoCallInst*)ins;
2234                         x86_call_reg (code, ins->sreg1);
2235                         if (call->stack_usage)
2236                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2237                         break;
2238                 case OP_FCALL_MEMBASE:
2239                 case OP_LCALL_MEMBASE:
2240                 case OP_VCALL_MEMBASE:
2241                 case OP_VOIDCALL_MEMBASE:
2242                 case OP_CALL_MEMBASE:
2243                         call = (MonoCallInst*)ins;
2244                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2245                         if (call->stack_usage)
2246                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2247                         break;
2248                 case OP_OUTARG:
2249                 case OP_X86_PUSH:
2250                         x86_push_reg (code, ins->sreg1);
2251                         break;
2252                 case OP_X86_PUSH_IMM:
2253                         x86_push_imm (code, ins->inst_imm);
2254                         break;
2255                 case OP_X86_PUSH_MEMBASE:
2256                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2257                         break;
2258                 case OP_X86_PUSH_OBJ: 
2259                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2260                         x86_push_reg (code, X86_EDI);
2261                         x86_push_reg (code, X86_ESI);
2262                         x86_push_reg (code, X86_ECX);
2263                         if (ins->inst_offset)
2264                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2265                         else
2266                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2267                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2268                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2269                         x86_cld (code);
2270                         x86_prefix (code, X86_REP_PREFIX);
2271                         x86_movsd (code);
2272                         x86_pop_reg (code, X86_ECX);
2273                         x86_pop_reg (code, X86_ESI);
2274                         x86_pop_reg (code, X86_EDI);
2275                         break;
2276                 case OP_X86_LEA:
2277                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2278                         break;
2279                 case OP_X86_XCHG:
2280                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2281                         break;
2282                 case OP_LOCALLOC:
2283                         /* keep alignment */
2284                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2285                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2286                         code = mono_emit_stack_alloc (code, ins);
2287                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2288                         break;
2289                 case CEE_RET:
2290                         x86_ret (code);
2291                         break;
2292                 case CEE_THROW: {
2293                         x86_push_reg (code, ins->sreg1);
2294                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2295                                              (gpointer)"mono_arch_throw_exception");
2296                         x86_call_code (code, 0);
2297                         break;
2298                 }
2299                 case OP_ENDFILTER:
2300                         if (ins->sreg1 != X86_EAX)
2301                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2302                         x86_mov_reg_membase (code, X86_ESP, X86_EBP, mono_exc_esp_offset, 4);
2303                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2304                         x86_ret (code);
2305                         break;
2306                 case CEE_ENDFINALLY:
2307                         /* 
2308                          * restore ESP - which can be modified when we allocate value types in the filter
2309                          */
2310                         x86_mov_reg_membase (code, X86_ESP, X86_EBP, mono_exc_esp_offset, 4);
2311                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2312                         x86_ret (code);
2313                         break;
2314                 case OP_HANDLER: 
2315                         x86_mov_membase_reg (code, X86_EBP, mono_exc_esp_offset, X86_ESP, 4);
2316                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2317                         x86_call_imm (code, 0);
2318                         break;
2319                 case OP_LABEL:
2320                         ins->inst_c0 = code - cfg->native_code;
2321                         break;
2322                 case CEE_BR:
2323                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2324                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2325                         //break;
2326                         if (ins->flags & MONO_INST_BRLABEL) {
2327                                 if (ins->inst_i0->inst_c0) {
2328                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2329                                 } else {
2330                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2331                                         x86_jump32 (code, 0);
2332                                 }
2333                         } else {
2334                                 if (ins->inst_target_bb->native_offset) {
2335                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2336                                 } else {
2337                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2338                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2339                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2340                                                 x86_jump8 (code, 0);
2341                                         else 
2342                                                 x86_jump32 (code, 0);
2343                                 } 
2344                         }
2345                         break;
2346                 case OP_BR_REG:
2347                         x86_jump_reg (code, ins->sreg1);
2348                         break;
2349                 case OP_CEQ:
2350                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2351                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2352                         break;
2353                 case OP_CLT:
2354                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2355                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2356                         break;
2357                 case OP_CLT_UN:
2358                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2359                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2360                         break;
2361                 case OP_CGT:
2362                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2363                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2364                         break;
2365                 case OP_CGT_UN:
2366                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2367                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2368                         break;
2369                 case OP_COND_EXC_EQ:
2370                 case OP_COND_EXC_NE_UN:
2371                 case OP_COND_EXC_LT:
2372                 case OP_COND_EXC_LT_UN:
2373                 case OP_COND_EXC_GT:
2374                 case OP_COND_EXC_GT_UN:
2375                 case OP_COND_EXC_GE:
2376                 case OP_COND_EXC_GE_UN:
2377                 case OP_COND_EXC_LE:
2378                 case OP_COND_EXC_LE_UN:
2379                 case OP_COND_EXC_OV:
2380                 case OP_COND_EXC_NO:
2381                 case OP_COND_EXC_C:
2382                 case OP_COND_EXC_NC:
2383                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
2384                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2385                         break;
2386                 case CEE_BEQ:
2387                 case CEE_BNE_UN:
2388                 case CEE_BLT:
2389                 case CEE_BLT_UN:
2390                 case CEE_BGT:
2391                 case CEE_BGT_UN:
2392                 case CEE_BGE:
2393                 case CEE_BGE_UN:
2394                 case CEE_BLE:
2395                 case CEE_BLE_UN:
2396                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2397                         break;
2398
2399                 /* floating point opcodes */
2400                 case OP_R8CONST: {
2401                         double d = *(double *)ins->inst_p0;
2402
2403                         if (d == 0.0) {
2404                                 x86_fldz (code);
2405                         } else if (d == 1.0) {
2406                                 x86_fld1 (code);
2407                         } else {
2408                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
2409                                 x86_fld (code, NULL, TRUE);
2410                         }
2411                         break;
2412                 }
2413                 case OP_R4CONST: {
2414                         float f = *(float *)ins->inst_p0;
2415
2416                         if (f == 0.0) {
2417                                 x86_fldz (code);
2418                         } else if (f == 1.0) {
2419                                 x86_fld1 (code);
2420                         } else {
2421                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
2422                                 x86_fld (code, NULL, FALSE);
2423                         }
2424                         break;
2425                 }
2426                 case OP_STORER8_MEMBASE_REG:
2427                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2428                         break;
2429                 case OP_LOADR8_MEMBASE:
2430                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2431                         break;
2432                 case OP_STORER4_MEMBASE_REG:
2433                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2434                         break;
2435                 case OP_LOADR4_MEMBASE:
2436                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2437                         break;
2438                 case CEE_CONV_R4: /* FIXME: change precision */
2439                 case CEE_CONV_R8:
2440                         x86_push_reg (code, ins->sreg1);
2441                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2442                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2443                         break;
2444                 case OP_X86_FP_LOAD_I8:
2445                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2446                         break;
2447                 case OP_X86_FP_LOAD_I4:
2448                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2449                         break;
2450                 case OP_FCONV_TO_I1:
2451                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2452                         break;
2453                 case OP_FCONV_TO_U1:
2454                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2455                         break;
2456                 case OP_FCONV_TO_I2:
2457                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2458                         break;
2459                 case OP_FCONV_TO_U2:
2460                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2461                         break;
2462                 case OP_FCONV_TO_I4:
2463                 case OP_FCONV_TO_I:
2464                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2465                         break;
2466                 case OP_FCONV_TO_I8:
2467                         /* we defined this instruction to output only to eax:edx */
2468                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2469                         x86_fnstcw_membase(code, X86_ESP, 0);
2470                         x86_mov_reg_membase (code, X86_EAX, X86_ESP, 0, 2);
2471                         x86_alu_reg_imm (code, X86_OR, X86_EAX, 0xc00);
2472                         x86_mov_membase_reg (code, X86_ESP, 2, X86_EAX, 2);
2473                         x86_fldcw_membase (code, X86_ESP, 2);
2474                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2475                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2476                         x86_pop_reg (code, X86_EAX);
2477                         x86_pop_reg (code, X86_EDX);
2478                         x86_fldcw_membase (code, X86_ESP, 0);
2479                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2480                         break;
2481                 case OP_LCONV_TO_R_UN: { 
2482                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2483                         guint8 *br;
2484
2485                         /* load 64bit integer to FP stack */
2486                         x86_push_imm (code, 0);
2487                         x86_push_reg (code, ins->sreg2);
2488                         x86_push_reg (code, ins->sreg1);
2489                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2490                         /* store as 80bit FP value */
2491                         x86_fst80_membase (code, X86_ESP, 0);
2492                         
2493                         /* test if lreg is negative */
2494                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2495                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2496         
2497                         /* add correction constant mn */
2498                         x86_fld80_mem (code, mn);
2499                         x86_fld80_membase (code, X86_ESP, 0);
2500                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2501                         x86_fst80_membase (code, X86_ESP, 0);
2502
2503                         x86_patch (br, code);
2504
2505                         x86_fld80_membase (code, X86_ESP, 0);
2506                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2507
2508                         break;
2509                 }
2510                 case OP_LCONV_TO_OVF_I: {
2511                         guint8 *br [3], *label [1];
2512
2513                         /* 
2514                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2515                          */
2516                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2517
2518                         /* If the low word top bit is set, see if we are negative */
2519                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2520                         /* We are not negative (no top bit set, check for our top word to be zero */
2521                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2522                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2523                         label [0] = code;
2524
2525                         /* throw exception */
2526                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2527                         x86_jump32 (code, 0);
2528         
2529                         x86_patch (br [0], code);
2530                         /* our top bit is set, check that top word is 0xfffffff */
2531                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2532                 
2533                         x86_patch (br [1], code);
2534                         /* nope, emit exception */
2535                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2536                         x86_patch (br [2], label [0]);
2537
2538                         if (ins->dreg != ins->sreg1)
2539                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2540                         break;
2541                 }
2542                 case OP_FADD:
2543                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2544                         break;
2545                 case OP_FSUB:
2546                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2547                         break;          
2548                 case OP_FMUL:
2549                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2550                         break;          
2551                 case OP_FDIV:
2552                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2553                         break;          
2554                 case OP_FNEG:
2555                         x86_fchs (code);
2556                         break;          
2557                 case OP_SIN:
2558                         x86_fsin (code);
2559                         break;          
2560                 case OP_COS:
2561                         x86_fcos (code);
2562                         break;          
2563                 case OP_ABS:
2564                         x86_fabs (code);
2565                         break;          
2566                 case OP_TAN:
2567                         x86_fptan (code);
2568                         break;          
2569                 case OP_ATAN:
2570                         x86_fpatan (code);
2571                         break;          
2572                 case OP_SQRT:
2573                         x86_fsqrt (code);
2574                         break;          
2575                 case OP_X86_FPOP:
2576                         x86_fstp (code, 0);
2577                         break;          
2578                 case OP_FREM: {
2579                         guint8 *l1, *l2;
2580
2581                         x86_push_reg (code, X86_EAX);
2582                         /* we need to exchange ST(0) with ST(1) */
2583                         x86_fxch (code, 1);
2584
2585                         /* this requires a loop, because fprem1 somtimes 
2586                          * returns a partial remainder */
2587                         l1 = code;
2588                         x86_fprem (code);
2589                         x86_fnstsw (code);
2590                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x0400);
2591                         l2 = code + 2;
2592                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2593
2594                         /* pop result */
2595                         x86_fstp (code, 1);
2596
2597                         x86_pop_reg (code, X86_EAX);
2598                         break;
2599                 }
2600                 case OP_FCOMPARE:
2601                         if (cfg->opt & MONO_OPT_FCMOV) {
2602                                 x86_fcomip (code, 1);
2603                                 x86_fstp (code, 0);
2604                                 break;
2605                         }
2606                         /* this overwrites EAX */
2607                         EMIT_FPCOMPARE(code);
2608                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4500);
2609                         break;
2610                 case OP_FCEQ:
2611                         if (cfg->opt & MONO_OPT_FCMOV) {
2612                                 /* zeroing the register at the start results in 
2613                                  * shorter and faster code (we can also remove the widening op)
2614                                  */
2615                                 guchar *unordered_check;
2616                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2617                                 x86_fcomip (code, 1);
2618                                 x86_fstp (code, 0);
2619                                 unordered_check = code;
2620                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2621                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2622                                 x86_patch (unordered_check, code);
2623                                 break;
2624                         }
2625                         if (ins->dreg != X86_EAX) 
2626                                 x86_push_reg (code, X86_EAX);
2627
2628                         EMIT_FPCOMPARE(code);
2629                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4500);
2630                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2631                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2632                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2633
2634                         if (ins->dreg != X86_EAX) 
2635                                 x86_pop_reg (code, X86_EAX);
2636                         break;
2637                 case OP_FCLT:
2638                 case OP_FCLT_UN:
2639                         if (cfg->opt & MONO_OPT_FCMOV) {
2640                                 /* zeroing the register at the start results in 
2641                                  * shorter and faster code (we can also remove the widening op)
2642                                  */
2643                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2644                                 x86_fcomip (code, 1);
2645                                 x86_fstp (code, 0);
2646                                 if (ins->opcode == OP_FCLT_UN) {
2647                                         guchar *unordered_check = code;
2648                                         guchar *jump_to_end;
2649                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2650                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2651                                         jump_to_end = code;
2652                                         x86_jump8 (code, 0);
2653                                         x86_patch (unordered_check, code);
2654                                         x86_inc_reg (code, ins->dreg);
2655                                         x86_patch (jump_to_end, code);
2656                                 } else {
2657                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2658                                 }
2659                                 break;
2660                         }
2661                         if (ins->dreg != X86_EAX) 
2662                                 x86_push_reg (code, X86_EAX);
2663
2664                         EMIT_FPCOMPARE(code);
2665                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4500);
2666                         if (ins->opcode == OP_FCLT_UN) {
2667                                 guchar *is_not_zero_check, *end_jump;
2668                                 is_not_zero_check = code;
2669                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2670                                 end_jump = code;
2671                                 x86_jump8 (code, 0);
2672                                 x86_patch (is_not_zero_check, code);
2673                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4500);
2674
2675                                 x86_patch (end_jump, code);
2676                         }
2677                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2678                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2679
2680                         if (ins->dreg != X86_EAX) 
2681                                 x86_pop_reg (code, X86_EAX);
2682                         break;
2683                 case OP_FCGT:
2684                 case OP_FCGT_UN:
2685                         if (cfg->opt & MONO_OPT_FCMOV) {
2686                                 /* zeroing the register at the start results in 
2687                                  * shorter and faster code (we can also remove the widening op)
2688                                  */
2689                                 guchar *unordered_check;
2690                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2691                                 x86_fcomip (code, 1);
2692                                 x86_fstp (code, 0);
2693                                 if (ins->opcode == OP_FCGT) {
2694                                         unordered_check = code;
2695                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2696                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2697                                         x86_patch (unordered_check, code);
2698                                 } else {
2699                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2700                                 }
2701                                 break;
2702                         }
2703                         if (ins->dreg != X86_EAX) 
2704                                 x86_push_reg (code, X86_EAX);
2705
2706                         EMIT_FPCOMPARE(code);
2707                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4500);
2708                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x0100);
2709                         if (ins->opcode == OP_FCGT_UN) {
2710                                 guchar *is_not_zero_check, *end_jump;
2711                                 is_not_zero_check = code;
2712                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2713                                 end_jump = code;
2714                                 x86_jump8 (code, 0);
2715                                 x86_patch (is_not_zero_check, code);
2716                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4500);
2717
2718                                 x86_patch (end_jump, code);
2719                         }
2720                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2721                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2722
2723                         if (ins->dreg != X86_EAX) 
2724                                 x86_pop_reg (code, X86_EAX);
2725                         break;
2726                 case OP_FBEQ:
2727                         if (cfg->opt & MONO_OPT_FCMOV) {
2728                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2729                                 break;
2730                         }
2731                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2732                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2733                         break;
2734                 case OP_FBNE_UN:
2735                         if (cfg->opt & MONO_OPT_FCMOV) {
2736                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2737                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2738                                 break;
2739                         }
2740                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2741                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2742                         break;
2743                 case OP_FBLT:
2744                         if (cfg->opt & MONO_OPT_FCMOV) {
2745                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2746                                 break;
2747                         }
2748                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2749                         break;
2750                 case OP_FBLT_UN:
2751                         if (cfg->opt & MONO_OPT_FCMOV) {
2752                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2753                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2754                                 break;
2755                         }
2756                         if (ins->opcode == OP_FBLT_UN) {
2757                                 guchar *is_not_zero_check, *end_jump;
2758                                 is_not_zero_check = code;
2759                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2760                                 end_jump = code;
2761                                 x86_jump8 (code, 0);
2762                                 x86_patch (is_not_zero_check, code);
2763                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4500);
2764
2765                                 x86_patch (end_jump, code);
2766                         }
2767                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2768                         break;
2769                 case OP_FBGT:
2770                 case OP_FBGT_UN:
2771                         if (cfg->opt & MONO_OPT_FCMOV) {
2772                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
2773                                 break;
2774                         }
2775                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x0100);
2776                         if (ins->opcode == OP_FBGT_UN) {
2777                                 guchar *is_not_zero_check, *end_jump;
2778                                 is_not_zero_check = code;
2779                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2780                                 end_jump = code;
2781                                 x86_jump8 (code, 0);
2782                                 x86_patch (is_not_zero_check, code);
2783                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4500);
2784
2785                                 x86_patch (end_jump, code);
2786                         }
2787                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2788                         break;
2789                 case OP_FBGE:
2790                 case OP_FBGE_UN:
2791                         if (cfg->opt & MONO_OPT_FCMOV) {
2792                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
2793                                 break;
2794                         }
2795                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2796                         break;
2797                 case OP_FBLE:
2798                 case OP_FBLE_UN:
2799                         if (cfg->opt & MONO_OPT_FCMOV) {
2800                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2801                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
2802                                 break;
2803                         }
2804                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x0100);
2805                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2806                         break;
2807                 case CEE_CKFINITE: {
2808                         x86_push_reg (code, X86_EAX);
2809                         x86_fxam (code);
2810                         x86_fnstsw (code);
2811                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
2812                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x0100);
2813                         x86_pop_reg (code, X86_EAX);
2814                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
2815                         break;
2816                 }
2817                 default:
2818                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
2819                         g_assert_not_reached ();
2820                 }
2821
2822                 if ((code - cfg->native_code - offset) > max_len) {
2823                         g_warning ("wrong maximal instruction length of instruction %s (exptected %d, got %d)",
2824                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
2825                         g_assert_not_reached ();
2826                 }
2827                
2828                 cpos += max_len;
2829
2830                 last_ins = ins;
2831                 last_offset = offset;
2832                 
2833                 ins = ins->next;
2834         }
2835
2836         cfg->code_len = code - cfg->native_code;
2837 }
2838
2839 void
2840 mono_arch_register_lowlevel_calls (void)
2841 {
2842         mono_register_jit_icall (enter_method, "mono_enter_method", NULL, TRUE);
2843         mono_register_jit_icall (leave_method, "mono_leave_method", NULL, TRUE);
2844 }
2845
2846 void
2847 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji)
2848 {
2849         MonoJumpInfo *patch_info;
2850
2851         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
2852                 unsigned char *ip = patch_info->ip.i + code;
2853                 const unsigned char *target = NULL;
2854
2855                 switch (patch_info->type) {
2856                 case MONO_PATCH_INFO_BB:
2857                         target = patch_info->data.bb->native_offset + code;
2858                         break;
2859                 case MONO_PATCH_INFO_ABS:
2860                         target = patch_info->data.target;
2861                         break;
2862                 case MONO_PATCH_INFO_LABEL:
2863                         target = patch_info->data.inst->inst_c0 + code;
2864                         break;
2865                 case MONO_PATCH_INFO_IP:
2866                         *((gpointer *)(ip)) = ip;
2867                         continue;
2868                 case MONO_PATCH_INFO_INTERNAL_METHOD: {
2869                         MonoJitICallInfo *mi = mono_find_jit_icall_by_name (patch_info->data.name);
2870                         if (!mi) {
2871                                 g_warning ("unknown MONO_PATCH_INFO_INTERNAL_METHOD %s", patch_info->data.name);
2872                                 g_assert_not_reached ();
2873                         }
2874                         target = mi->wrapper;
2875                         break;
2876                 }
2877                 case MONO_PATCH_INFO_METHOD:
2878                         if (patch_info->data.method == method) {
2879                                 target = code;
2880                         } else {
2881                                 /* get the trampoline to the method from the domain */
2882                                 target = mono_arch_create_jit_trampoline (patch_info->data.method);
2883                         }
2884                         break;
2885                 case MONO_PATCH_INFO_SWITCH: {
2886                         gpointer *table = (gpointer *)patch_info->data.target;
2887                         int i;
2888
2889                         *((gconstpointer *)(ip + 2)) = patch_info->data.target;
2890
2891                         for (i = 0; i < patch_info->table_size; i++) {
2892                                 table [i] = (int)patch_info->data.table [i] + code;
2893                         }
2894                         /* we put into the table the absolute address, no need fo x86_patch in this case */
2895                         continue;
2896                 }
2897                 case MONO_PATCH_INFO_METHODCONST:
2898                 case MONO_PATCH_INFO_CLASS:
2899                 case MONO_PATCH_INFO_IMAGE:
2900                 case MONO_PATCH_INFO_FIELD:
2901                         *((gconstpointer *)(ip + 1)) = patch_info->data.target;
2902                         continue;
2903                 case MONO_PATCH_INFO_R4:
2904                 case MONO_PATCH_INFO_R8:
2905                         *((gconstpointer *)(ip + 2)) = patch_info->data.target;
2906                         continue;
2907                 default:
2908                         g_assert_not_reached ();
2909                 }
2910                 x86_patch (ip, target);
2911         }
2912 }
2913
2914 int
2915 mono_arch_max_epilog_size (MonoCompile *cfg)
2916 {
2917         int exc_count = 0, max_epilog_size = 16;
2918         MonoJumpInfo *patch_info;
2919         
2920         if (cfg->method->save_lmf)
2921                 max_epilog_size += 128;
2922         
2923         if (mono_jit_trace_calls)
2924                 max_epilog_size += 50;
2925
2926         if (mono_jit_profile)
2927                 max_epilog_size += 50;
2928
2929         /* count the number of exception infos */
2930      
2931         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
2932                 if (patch_info->type == MONO_PATCH_INFO_EXC)
2933                         exc_count++;
2934         }
2935
2936         /* 
2937          * make sure we have enough space for exceptions
2938          * 16 is the size of two push_imm instructions and a call
2939          */
2940         max_epilog_size += exc_count*16;
2941
2942         return max_epilog_size;
2943 }
2944
2945 guint8 *
2946 mono_arch_emit_prolog (MonoCompile *cfg)
2947 {
2948         MonoMethod *method = cfg->method;
2949         MonoBasicBlock *bb;
2950         MonoMethodSignature *sig;
2951         MonoInst *inst;
2952         int alloc_size, pos, max_offset, i;
2953         guint8 *code;
2954
2955         cfg->code_size = 256;
2956         code = cfg->native_code = g_malloc (cfg->code_size);
2957
2958         x86_push_reg (code, X86_EBP);
2959         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
2960
2961         alloc_size = - cfg->stack_offset;
2962         pos = 0;
2963
2964         if (method->save_lmf) {
2965                 pos += sizeof (MonoLMF);
2966                 
2967                 /* save the current IP */
2968                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
2969                 x86_push_imm (code, 0);
2970
2971                 /* save all caller saved regs */
2972                 x86_push_reg (code, X86_EBX);
2973                 x86_push_reg (code, X86_EDI);
2974                 x86_push_reg (code, X86_ESI);
2975                 x86_push_reg (code, X86_EBP);
2976
2977                 /* save method info */
2978                 x86_push_imm (code, method);
2979         
2980                 /* get the address of lmf for the current thread */
2981                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2982                                      (gpointer)"mono_get_lmf_addr");
2983                 x86_call_code (code, 0);
2984
2985                 /* push lmf */
2986                 x86_push_reg (code, X86_EAX); 
2987                 /* push *lfm (previous_lmf) */
2988                 x86_push_membase (code, X86_EAX, 0);
2989                 /* *(lmf) = ESP */
2990                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
2991         } else {
2992
2993                 if (cfg->used_int_regs & (1 << X86_EBX)) {
2994                         x86_push_reg (code, X86_EBX);
2995                         pos += 4;
2996                 }
2997
2998                 if (cfg->used_int_regs & (1 << X86_EDI)) {
2999                         x86_push_reg (code, X86_EDI);
3000                         pos += 4;
3001                 }
3002
3003                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3004                         x86_push_reg (code, X86_ESI);
3005                         pos += 4;
3006                 }
3007         }
3008
3009         alloc_size -= pos;
3010
3011         if (alloc_size)
3012                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3013
3014         /* compute max_offset in order to use short forward jumps */
3015         max_offset = 0;
3016         if (cfg->opt & MONO_OPT_BRANCH) {
3017                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3018                         MonoInst *ins = bb->code;
3019                         bb->max_offset = max_offset;
3020
3021                         if (mono_trace_coverage)
3022                                 max_offset += 6; 
3023
3024                         while (ins) {
3025                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3026                                 ins = ins->next;
3027                         }
3028                 }
3029         }
3030
3031         if (mono_jit_trace_calls)
3032                 code = mono_arch_instrument_prolog (cfg, enter_method, code, TRUE);
3033
3034         /* load arguments allocated to register from the stack */
3035         sig = method->signature;
3036         pos = 0;
3037
3038         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3039                 inst = cfg->varinfo [pos];
3040                 if (inst->opcode == OP_REGVAR) {
3041                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3042                         if (cfg->verbose_level > 2)
3043                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3044                 }
3045                 pos++;
3046         }
3047
3048         cfg->code_len = code - cfg->native_code;
3049
3050         return code;
3051 }
3052
3053 void
3054 mono_arch_emit_epilog (MonoCompile *cfg)
3055 {
3056         MonoJumpInfo *patch_info;
3057         MonoMethod *method = cfg->method;
3058         int pos;
3059         guint8 *code;
3060
3061         code = cfg->native_code + cfg->code_len;
3062
3063         if (mono_jit_trace_calls)
3064                 code = mono_arch_instrument_epilog (cfg, leave_method, code, TRUE);
3065
3066         
3067         pos = 0;
3068         
3069         if (method->save_lmf) {
3070                 pos = -sizeof (MonoLMF);
3071         } else {
3072                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3073                         pos -= 4;
3074                 }
3075                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3076                         pos -= 4;
3077                 }
3078                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3079                         pos -= 4;
3080                 }
3081         }
3082
3083         if (pos)
3084                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3085         
3086         if (method->save_lmf) {
3087                 /* ebx = previous_lmf */
3088                 x86_pop_reg (code, X86_EBX);
3089                 /* edi = lmf */
3090                 x86_pop_reg (code, X86_EDI);
3091                 /* *(lmf) = previous_lmf */
3092                 x86_mov_membase_reg (code, X86_EDI, 0, X86_EBX, 4);
3093
3094                 /* discard method info */
3095                 x86_pop_reg (code, X86_ESI);
3096
3097                 /* restore caller saved regs */
3098                 x86_pop_reg (code, X86_EBP);
3099                 x86_pop_reg (code, X86_ESI);
3100                 x86_pop_reg (code, X86_EDI);
3101                 x86_pop_reg (code, X86_EBX);
3102
3103         } else {
3104
3105                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3106                         x86_pop_reg (code, X86_ESI);
3107                 }
3108                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3109                         x86_pop_reg (code, X86_EDI);
3110                 }
3111                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3112                         x86_pop_reg (code, X86_EBX);
3113                 }
3114         }
3115
3116         x86_leave (code);
3117         x86_ret (code);
3118
3119         /* add code to raise exceptions */
3120         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3121                 switch (patch_info->type) {
3122                 case MONO_PATCH_INFO_EXC:
3123                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3124                         x86_push_imm (code, patch_info->data.target);
3125                         x86_push_imm (code, patch_info->ip.i + cfg->native_code);
3126                         patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3127                         patch_info->data.name = "mono_arch_throw_exception_by_name";
3128                         patch_info->ip.i = code - cfg->native_code;
3129                         x86_jump_code (code, 0);
3130                         break;
3131                 default:
3132                         /* do nothing */
3133                         break;
3134                 }
3135         }
3136
3137         cfg->code_len = code - cfg->native_code;
3138
3139         g_assert (cfg->code_len < cfg->code_size);
3140
3141 }