2005-03-20 Zoltan Varga <vargaz@freemail.hu>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14
15 #include <mono/metadata/appdomain.h>
16 #include <mono/metadata/debug-helpers.h>
17 #include <mono/metadata/threads.h>
18 #include <mono/metadata/profiler-private.h>
19 #include <mono/utils/mono-math.h>
20
21 #include "trace.h"
22 #include "mini-x86.h"
23 #include "inssel.h"
24 #include "cpu-pentium.h"
25
26 /* On windows, these hold the key returned by TlsAlloc () */
27 static gint lmf_tls_offset = -1;
28 static gint appdomain_tls_offset = -1;
29 static gint thread_tls_offset = -1;
30
31 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
32
33 #ifdef PLATFORM_WIN32
34 /* Under windows, the default pinvoke calling convention is stdcall */
35 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
36 #else
37 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
38 #endif
39
40 #define SIGNAL_STACK_SIZE (64 * 1024)
41
42 #define NOT_IMPLEMENTED g_assert_not_reached ()
43
44 const char*
45 mono_arch_regname (int reg) {
46         switch (reg) {
47         case X86_EAX: return "%eax";
48         case X86_EBX: return "%ebx";
49         case X86_ECX: return "%ecx";
50         case X86_EDX: return "%edx";
51         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
52         case X86_EDI: return "%edi";
53         case X86_ESI: return "%esi";
54         }
55         return "unknown";
56 }
57
58 typedef enum {
59         ArgInIReg,
60         ArgInFloatSSEReg,
61         ArgInDoubleSSEReg,
62         ArgOnStack,
63         ArgValuetypeInReg,
64         ArgOnFloatFpStack,
65         ArgOnDoubleFpStack,
66         ArgNone
67 } ArgStorage;
68
69 typedef struct {
70         gint16 offset;
71         gint8  reg;
72         ArgStorage storage;
73
74         /* Only if storage == ArgValuetypeInReg */
75         ArgStorage pair_storage [2];
76         gint8 pair_regs [2];
77 } ArgInfo;
78
79 typedef struct {
80         int nargs;
81         guint32 stack_usage;
82         guint32 reg_usage;
83         guint32 freg_usage;
84         gboolean need_stack_align;
85         ArgInfo ret;
86         ArgInfo sig_cookie;
87         ArgInfo args [1];
88 } CallInfo;
89
90 #define PARAM_REGS 0
91
92 #define FLOAT_PARAM_REGS 0
93
94 static X86_Reg_No param_regs [] = { };
95
96 #ifdef PLATFORM_WIN32
97 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
98 #endif
99
100 static void inline
101 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
102 {
103     ainfo->offset = *stack_size;
104
105     if (*gr >= PARAM_REGS) {
106                 ainfo->storage = ArgOnStack;
107                 (*stack_size) += sizeof (gpointer);
108     }
109     else {
110                 ainfo->storage = ArgInIReg;
111                 ainfo->reg = param_regs [*gr];
112                 (*gr) ++;
113     }
114 }
115
116 static void inline
117 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
118 {
119         ainfo->offset = *stack_size;
120
121         g_assert (PARAM_REGS == 0);
122         
123         ainfo->storage = ArgOnStack;
124         (*stack_size) += sizeof (gpointer) * 2;
125 }
126
127 static void inline
128 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
129 {
130     ainfo->offset = *stack_size;
131
132     if (*gr >= FLOAT_PARAM_REGS) {
133                 ainfo->storage = ArgOnStack;
134                 (*stack_size) += sizeof (gpointer);
135     }
136     else {
137                 /* A double register */
138                 if (is_double)
139                         ainfo->storage = ArgInDoubleSSEReg;
140                 else
141                         ainfo->storage = ArgInFloatSSEReg;
142                 ainfo->reg = *gr;
143                 (*gr) += 1;
144     }
145 }
146
147
148 static void
149 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
150                gboolean is_return,
151                guint32 *gr, guint32 *fr, guint32 *stack_size)
152 {
153         guint32 size;
154         MonoClass *klass;
155
156         klass = mono_class_from_mono_type (type);
157         if (sig->pinvoke) 
158                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
159         else 
160                 size = mono_type_stack_size (&klass->byval_arg, NULL);
161
162 #ifdef PLATFORM_WIN32
163         if (sig->pinvoke && is_return) {
164                 MonoMarshalType *info;
165
166                 /*
167                  * the exact rules are not very well documented, the code below seems to work with the 
168                  * code generated by gcc 3.3.3 -mno-cygwin.
169                  */
170                 info = mono_marshal_load_type_info (klass);
171                 g_assert (info);
172
173                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
174
175                 /* Special case structs with only a float member */
176                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
177                         ainfo->storage = ArgValuetypeInReg;
178                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
179                         return;
180                 }
181                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
182                         ainfo->storage = ArgValuetypeInReg;
183                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
184                         return;
185                 }               
186                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
187                         ainfo->storage = ArgValuetypeInReg;
188                         ainfo->pair_storage [0] = ArgInIReg;
189                         ainfo->pair_regs [0] = return_regs [0];
190                         if (info->native_size > 4) {
191                                 ainfo->pair_storage [1] = ArgInIReg;
192                                 ainfo->pair_regs [1] = return_regs [1];
193                         }
194                         return;
195                 }
196         }
197 #endif
198
199         ainfo->offset = *stack_size;
200         ainfo->storage = ArgOnStack;
201         *stack_size += ALIGN_TO (size, sizeof (gpointer));
202 }
203
204 /*
205  * get_call_info:
206  *
207  *  Obtain information about a call according to the calling convention.
208  * For x86 ELF, see the "System V Application Binary Interface Intel386 
209  * Architecture Processor Supplment, Fourth Edition" document for more
210  * information.
211  * For x86 win32, see ???.
212  */
213 static CallInfo*
214 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
215 {
216         guint32 i, gr, fr;
217         MonoType *ret_type;
218         int n = sig->hasthis + sig->param_count;
219         guint32 stack_size = 0;
220         CallInfo *cinfo;
221
222         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
223
224         gr = 0;
225         fr = 0;
226
227         /* return value */
228         {
229                 ret_type = mono_type_get_underlying_type (sig->ret);
230                 switch (ret_type->type) {
231                 case MONO_TYPE_BOOLEAN:
232                 case MONO_TYPE_I1:
233                 case MONO_TYPE_U1:
234                 case MONO_TYPE_I2:
235                 case MONO_TYPE_U2:
236                 case MONO_TYPE_CHAR:
237                 case MONO_TYPE_I4:
238                 case MONO_TYPE_U4:
239                 case MONO_TYPE_I:
240                 case MONO_TYPE_U:
241                 case MONO_TYPE_PTR:
242                 case MONO_TYPE_FNPTR:
243                 case MONO_TYPE_CLASS:
244                 case MONO_TYPE_OBJECT:
245                 case MONO_TYPE_SZARRAY:
246                 case MONO_TYPE_ARRAY:
247                 case MONO_TYPE_STRING:
248                         cinfo->ret.storage = ArgInIReg;
249                         cinfo->ret.reg = X86_EAX;
250                         break;
251                 case MONO_TYPE_U8:
252                 case MONO_TYPE_I8:
253                         cinfo->ret.storage = ArgInIReg;
254                         cinfo->ret.reg = X86_EAX;
255                         break;
256                 case MONO_TYPE_R4:
257                         cinfo->ret.storage = ArgOnFloatFpStack;
258                         break;
259                 case MONO_TYPE_R8:
260                         cinfo->ret.storage = ArgOnDoubleFpStack;
261                         break;
262                 case MONO_TYPE_VALUETYPE: {
263                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
264
265                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
266                         if (cinfo->ret.storage == ArgOnStack)
267                                 /* The caller passes the address where the value is stored */
268                                 add_general (&gr, &stack_size, &cinfo->ret);
269                         break;
270                 }
271                 case MONO_TYPE_TYPEDBYREF:
272                         /* Same as a valuetype with size 24 */
273                         add_general (&gr, &stack_size, &cinfo->ret);
274                         ;
275                         break;
276                 case MONO_TYPE_VOID:
277                         cinfo->ret.storage = ArgNone;
278                         break;
279                 default:
280                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
281                 }
282         }
283
284         /* this */
285         if (sig->hasthis)
286                 add_general (&gr, &stack_size, cinfo->args + 0);
287
288         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
289                 gr = PARAM_REGS;
290                 fr = FLOAT_PARAM_REGS;
291                 
292                 /* Emit the signature cookie just before the implicit arguments */
293                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
294         }
295
296         for (i = 0; i < sig->param_count; ++i) {
297                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
298                 MonoType *ptype;
299
300                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
301                         /* We allways pass the sig cookie on the stack for simplicity */
302                         /* 
303                          * Prevent implicit arguments + the sig cookie from being passed 
304                          * in registers.
305                          */
306                         gr = PARAM_REGS;
307                         fr = FLOAT_PARAM_REGS;
308
309                         /* Emit the signature cookie just before the implicit arguments */
310                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
311                 }
312
313                 if (sig->params [i]->byref) {
314                         add_general (&gr, &stack_size, ainfo);
315                         continue;
316                 }
317                 ptype = mono_type_get_underlying_type (sig->params [i]);
318                 switch (ptype->type) {
319                 case MONO_TYPE_BOOLEAN:
320                 case MONO_TYPE_I1:
321                 case MONO_TYPE_U1:
322                         add_general (&gr, &stack_size, ainfo);
323                         break;
324                 case MONO_TYPE_I2:
325                 case MONO_TYPE_U2:
326                 case MONO_TYPE_CHAR:
327                         add_general (&gr, &stack_size, ainfo);
328                         break;
329                 case MONO_TYPE_I4:
330                 case MONO_TYPE_U4:
331                         add_general (&gr, &stack_size, ainfo);
332                         break;
333                 case MONO_TYPE_I:
334                 case MONO_TYPE_U:
335                 case MONO_TYPE_PTR:
336                 case MONO_TYPE_FNPTR:
337                 case MONO_TYPE_CLASS:
338                 case MONO_TYPE_OBJECT:
339                 case MONO_TYPE_STRING:
340                 case MONO_TYPE_SZARRAY:
341                 case MONO_TYPE_ARRAY:
342                         add_general (&gr, &stack_size, ainfo);
343                         break;
344                 case MONO_TYPE_VALUETYPE:
345                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
346                         break;
347                 case MONO_TYPE_TYPEDBYREF:
348                         stack_size += sizeof (MonoTypedRef);
349                         ainfo->storage = ArgOnStack;
350                         break;
351                 case MONO_TYPE_U8:
352                 case MONO_TYPE_I8:
353                         add_general_pair (&gr, &stack_size, ainfo);
354                         break;
355                 case MONO_TYPE_R4:
356                         add_float (&fr, &stack_size, ainfo, FALSE);
357                         break;
358                 case MONO_TYPE_R8:
359                         add_float (&fr, &stack_size, ainfo, TRUE);
360                         break;
361                 default:
362                         g_error ("unexpected type 0x%x", ptype->type);
363                         g_assert_not_reached ();
364                 }
365         }
366
367         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
368                 gr = PARAM_REGS;
369                 fr = FLOAT_PARAM_REGS;
370                 
371                 /* Emit the signature cookie just before the implicit arguments */
372                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
373         }
374
375         cinfo->stack_usage = stack_size;
376         cinfo->reg_usage = gr;
377         cinfo->freg_usage = fr;
378         return cinfo;
379 }
380
381 /*
382  * mono_arch_get_argument_info:
383  * @csig:  a method signature
384  * @param_count: the number of parameters to consider
385  * @arg_info: an array to store the result infos
386  *
387  * Gathers information on parameters such as size, alignment and
388  * padding. arg_info should be large enought to hold param_count + 1 entries. 
389  *
390  * Returns the size of the activation frame.
391  */
392 int
393 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
394 {
395         int k, frame_size = 0;
396         int size, align, pad;
397         int offset = 8;
398         CallInfo *cinfo;
399
400         cinfo = get_call_info (csig, FALSE);
401
402         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
403                 frame_size += sizeof (gpointer);
404                 offset += 4;
405         }
406
407         arg_info [0].offset = offset;
408
409         if (csig->hasthis) {
410                 frame_size += sizeof (gpointer);
411                 offset += 4;
412         }
413
414         arg_info [0].size = frame_size;
415
416         for (k = 0; k < param_count; k++) {
417                 
418                 if (csig->pinvoke)
419                         size = mono_type_native_stack_size (csig->params [k], &align);
420                 else
421                         size = mono_type_stack_size (csig->params [k], &align);
422
423                 /* ignore alignment for now */
424                 align = 1;
425
426                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
427                 arg_info [k].pad = pad;
428                 frame_size += size;
429                 arg_info [k + 1].pad = 0;
430                 arg_info [k + 1].size = size;
431                 offset += pad;
432                 arg_info [k + 1].offset = offset;
433                 offset += size;
434         }
435
436         align = MONO_ARCH_FRAME_ALIGNMENT;
437         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
438         arg_info [k].pad = pad;
439
440         g_free (cinfo);
441
442         return frame_size;
443 }
444
445 static const guchar cpuid_impl [] = {
446         0x55,                           /* push   %ebp */
447         0x89, 0xe5,                     /* mov    %esp,%ebp */
448         0x53,                           /* push   %ebx */
449         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
450         0x0f, 0xa2,                     /* cpuid   */
451         0x50,                           /* push   %eax */
452         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
453         0x89, 0x18,                     /* mov    %ebx,(%eax) */
454         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
455         0x89, 0x08,                     /* mov    %ecx,(%eax) */
456         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
457         0x89, 0x10,                     /* mov    %edx,(%eax) */
458         0x58,                           /* pop    %eax */
459         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
460         0x89, 0x02,                     /* mov    %eax,(%edx) */
461         0x5b,                           /* pop    %ebx */
462         0xc9,                           /* leave   */
463         0xc3,                           /* ret     */
464 };
465
466 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
467
468 static int 
469 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
470 {
471         int have_cpuid = 0;
472         __asm__  __volatile__ (
473                 "pushfl\n"
474                 "popl %%eax\n"
475                 "movl %%eax, %%edx\n"
476                 "xorl $0x200000, %%eax\n"
477                 "pushl %%eax\n"
478                 "popfl\n"
479                 "pushfl\n"
480                 "popl %%eax\n"
481                 "xorl %%edx, %%eax\n"
482                 "andl $0x200000, %%eax\n"
483                 "movl %%eax, %0"
484                 : "=r" (have_cpuid)
485                 :
486                 : "%eax", "%edx"
487         );
488
489         if (have_cpuid) {
490                 /* Have to use the code manager to get around WinXP DEP */
491                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
492                 CpuidFunc func;
493                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
494                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
495
496                 func = (CpuidFunc)ptr;
497                 func (id, p_eax, p_ebx, p_ecx, p_edx);
498
499                 mono_code_manager_destroy (codeman);
500
501                 /*
502                  * We use this approach because of issues with gcc and pic code, see:
503                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
504                 __asm__ __volatile__ ("cpuid"
505                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
506                         : "a" (id));
507                 */
508                 return 1;
509         }
510         return 0;
511 }
512
513 /*
514  * Initialize the cpu to execute managed code.
515  */
516 void
517 mono_arch_cpu_init (void)
518 {
519         guint16 fpcw;
520
521         /* spec compliance requires running with double precision */
522         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
523         fpcw &= ~X86_FPCW_PRECC_MASK;
524         fpcw |= X86_FPCW_PREC_DOUBLE;
525         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
526         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
527 }
528
529 /*
530  * This function returns the optimizations supported on this cpu.
531  */
532 guint32
533 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
534 {
535         int eax, ebx, ecx, edx;
536         guint32 opts = 0;
537         
538         *exclude_mask = 0;
539         /* Feature Flags function, flags returned in EDX. */
540         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
541                 if (edx & (1 << 15)) {
542                         opts |= MONO_OPT_CMOV;
543                         if (edx & 1)
544                                 opts |= MONO_OPT_FCMOV;
545                         else
546                                 *exclude_mask |= MONO_OPT_FCMOV;
547                 } else
548                         *exclude_mask |= MONO_OPT_CMOV;
549         }
550         return opts;
551 }
552
553 /*
554  * Determine whenever the trap whose info is in SIGINFO is caused by
555  * integer overflow.
556  */
557 gboolean
558 mono_arch_is_int_overflow (void *sigctx, void *info)
559 {
560         struct sigcontext *ctx = (struct sigcontext*)sigctx;
561         guint8* ip;
562
563         ip = (guint8*)ctx->SC_EIP;
564
565         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
566                 gint32 reg;
567
568                 /* idiv REG */
569                 switch (x86_modrm_rm (ip [1])) {
570                 case X86_ECX:
571                         reg = ctx->SC_ECX;
572                         break;
573                 case X86_EBX:
574                         reg = ctx->SC_EBX;
575                         break;
576                 default:
577                         g_assert_not_reached ();
578                         reg = -1;
579                 }
580
581                 if (reg == -1)
582                         return TRUE;
583         }
584                         
585         return FALSE;
586 }
587
588 static gboolean
589 is_regsize_var (MonoType *t) {
590         if (t->byref)
591                 return TRUE;
592         switch (mono_type_get_underlying_type (t)->type) {
593         case MONO_TYPE_I4:
594         case MONO_TYPE_U4:
595         case MONO_TYPE_I:
596         case MONO_TYPE_U:
597         case MONO_TYPE_PTR:
598         case MONO_TYPE_FNPTR:
599                 return TRUE;
600         case MONO_TYPE_OBJECT:
601         case MONO_TYPE_STRING:
602         case MONO_TYPE_CLASS:
603         case MONO_TYPE_SZARRAY:
604         case MONO_TYPE_ARRAY:
605                 return TRUE;
606         case MONO_TYPE_VALUETYPE:
607                 return FALSE;
608         }
609         return FALSE;
610 }
611
612 GList *
613 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
614 {
615         GList *vars = NULL;
616         int i;
617
618         for (i = 0; i < cfg->num_varinfo; i++) {
619                 MonoInst *ins = cfg->varinfo [i];
620                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
621
622                 /* unused vars */
623                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
624                         continue;
625
626                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
627                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
628                         continue;
629
630                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
631                  * 8bit quantities in caller saved registers on x86 */
632                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
633                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
634                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
635                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
636                         g_assert (i == vmv->idx);
637                         vars = g_list_prepend (vars, vmv);
638                 }
639         }
640
641         vars = mono_varlist_sort (cfg, vars, 0);
642
643         return vars;
644 }
645
646 GList *
647 mono_arch_get_global_int_regs (MonoCompile *cfg)
648 {
649         GList *regs = NULL;
650
651         /* we can use 3 registers for global allocation */
652         regs = g_list_prepend (regs, (gpointer)X86_EBX);
653         regs = g_list_prepend (regs, (gpointer)X86_ESI);
654         regs = g_list_prepend (regs, (gpointer)X86_EDI);
655
656         return regs;
657 }
658
659 /*
660  * mono_arch_regalloc_cost:
661  *
662  *  Return the cost, in number of memory references, of the action of 
663  * allocating the variable VMV into a register during global register
664  * allocation.
665  */
666 guint32
667 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
668 {
669         MonoInst *ins = cfg->varinfo [vmv->idx];
670
671         if (cfg->method->save_lmf)
672                 /* The register is already saved */
673                 return (ins->opcode == OP_ARG) ? 1 : 0;
674         else
675                 /* push+pop+possible load if it is an argument */
676                 return (ins->opcode == OP_ARG) ? 3 : 2;
677 }
678  
679 /*
680  * Set var information according to the calling convention. X86 version.
681  * The locals var stuff should most likely be split in another method.
682  */
683 void
684 mono_arch_allocate_vars (MonoCompile *m)
685 {
686         MonoMethodSignature *sig;
687         MonoMethodHeader *header;
688         MonoInst *inst;
689         guint32 locals_stack_size, locals_stack_align;
690         int i, offset, curinst, size, align;
691         gint32 *offsets;
692         CallInfo *cinfo;
693
694         header = mono_method_get_header (m->method);
695         sig = mono_method_signature (m->method);
696
697         offset = 8;
698         curinst = 0;
699
700         cinfo = get_call_info (sig, FALSE);
701
702         switch (cinfo->ret.storage) {
703         case ArgOnStack:
704                 m->ret->opcode = OP_REGOFFSET;
705                 m->ret->inst_basereg = X86_EBP;
706                 m->ret->inst_offset = offset;
707                 offset += sizeof (gpointer);
708                 break;
709         case ArgValuetypeInReg:
710                 break;
711         case ArgInIReg:
712                 m->ret->opcode = OP_REGVAR;
713                 m->ret->inst_c0 = cinfo->ret.reg;
714                 break;
715         case ArgNone:
716         case ArgOnFloatFpStack:
717         case ArgOnDoubleFpStack:
718                 break;
719         default:
720                 g_assert_not_reached ();
721         }
722
723         if (sig->hasthis) {
724                 inst = m->varinfo [curinst];
725                 if (inst->opcode != OP_REGVAR) {
726                         inst->opcode = OP_REGOFFSET;
727                         inst->inst_basereg = X86_EBP;
728                 }
729                 inst->inst_offset = offset;
730                 offset += sizeof (gpointer);
731                 curinst++;
732         }
733
734         if (sig->call_convention == MONO_CALL_VARARG) {
735                 m->sig_cookie = offset;
736                 offset += sizeof (gpointer);
737         }
738
739         for (i = 0; i < sig->param_count; ++i) {
740                 inst = m->varinfo [curinst];
741                 if (inst->opcode != OP_REGVAR) {
742                         inst->opcode = OP_REGOFFSET;
743                         inst->inst_basereg = X86_EBP;
744                 }
745                 inst->inst_offset = offset;
746                 size = mono_type_size (sig->params [i], &align);
747                 size += 4 - 1;
748                 size &= ~(4 - 1);
749                 offset += size;
750                 curinst++;
751         }
752
753         offset = 0;
754
755         /* reserve space to save LMF and caller saved registers */
756
757         if (m->method->save_lmf) {
758                 offset += sizeof (MonoLMF);
759         } else {
760                 if (m->used_int_regs & (1 << X86_EBX)) {
761                         offset += 4;
762                 }
763
764                 if (m->used_int_regs & (1 << X86_EDI)) {
765                         offset += 4;
766                 }
767
768                 if (m->used_int_regs & (1 << X86_ESI)) {
769                         offset += 4;
770                 }
771         }
772
773         switch (cinfo->ret.storage) {
774         case ArgValuetypeInReg:
775                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
776                 offset += 8;
777                 m->ret->opcode = OP_REGOFFSET;
778                 m->ret->inst_basereg = X86_EBP;
779                 m->ret->inst_offset = - offset;
780                 break;
781         default:
782                 break;
783         }
784
785         /* Allocate locals */
786         offsets = mono_allocate_stack_slots (m, &locals_stack_size, &locals_stack_align);
787         if (locals_stack_align) {
788                 offset += (locals_stack_align - 1);
789                 offset &= ~(locals_stack_align - 1);
790         }
791         for (i = m->locals_start; i < m->num_varinfo; i++) {
792                 if (offsets [i] != -1) {
793                         MonoInst *inst = m->varinfo [i];
794                         inst->opcode = OP_REGOFFSET;
795                         inst->inst_basereg = X86_EBP;
796                         inst->inst_offset = - (offset + offsets [i]);
797                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
798                 }
799         }
800         g_free (offsets);
801         offset += locals_stack_size;
802
803         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
804         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
805
806         g_free (cinfo);
807
808         /* change sign? */
809         m->stack_offset = -offset;
810 }
811
812 void
813 mono_arch_create_vars (MonoCompile *cfg)
814 {
815         MonoMethodSignature *sig;
816         CallInfo *cinfo;
817
818         sig = mono_method_signature (cfg->method);
819
820         cinfo = get_call_info (sig, FALSE);
821
822         if (cinfo->ret.storage == ArgValuetypeInReg)
823                 cfg->ret_var_is_local = TRUE;
824
825         g_free (cinfo);
826 }
827
828 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
829  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
830  */
831
832 /* 
833  * take the arguments and generate the arch-specific
834  * instructions to properly call the function in call.
835  * This includes pushing, moving arguments to the right register
836  * etc.
837  * Issue: who does the spilling if needed, and when?
838  */
839 MonoCallInst*
840 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
841         MonoInst *arg, *in;
842         MonoMethodSignature *sig;
843         int i, n, stack_size, type;
844         MonoType *ptype;
845         CallInfo *cinfo;
846
847         stack_size = 0;
848         /* add the vararg cookie before the non-implicit args */
849         if (call->signature->call_convention == MONO_CALL_VARARG) {
850                 MonoInst *sig_arg;
851                 /* FIXME: Add support for signature tokens to AOT */
852                 cfg->disable_aot = TRUE;
853                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
854                 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
855                 sig_arg->inst_p0 = call->signature;
856                 arg->inst_left = sig_arg;
857                 arg->type = STACK_PTR;
858                 /* prepend, so they get reversed */
859                 arg->next = call->out_args;
860                 call->out_args = arg;
861                 stack_size += sizeof (gpointer);
862         }
863         sig = call->signature;
864         n = sig->param_count + sig->hasthis;
865
866         cinfo = get_call_info (sig, FALSE);
867
868         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
869                 if (cinfo->ret.storage == ArgOnStack)
870                         stack_size += sizeof (gpointer);
871         }
872
873         for (i = 0; i < n; ++i) {
874                 if (is_virtual && i == 0) {
875                         /* the argument will be attached to the call instrucion */
876                         in = call->args [i];
877                         stack_size += 4;
878                 } else {
879                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
880                         in = call->args [i];
881                         arg->cil_code = in->cil_code;
882                         arg->inst_left = in;
883                         arg->type = in->type;
884                         /* prepend, so they get reversed */
885                         arg->next = call->out_args;
886                         call->out_args = arg;
887                         if (i >= sig->hasthis) {
888                                 MonoType *t = sig->params [i - sig->hasthis];
889                                 ptype = mono_type_get_underlying_type (t);
890                                 if (t->byref)
891                                         type = MONO_TYPE_U;
892                                 else
893                                         type = ptype->type;
894                                 /* FIXME: validate arguments... */
895                                 switch (type) {
896                                 case MONO_TYPE_I:
897                                 case MONO_TYPE_U:
898                                 case MONO_TYPE_BOOLEAN:
899                                 case MONO_TYPE_CHAR:
900                                 case MONO_TYPE_I1:
901                                 case MONO_TYPE_U1:
902                                 case MONO_TYPE_I2:
903                                 case MONO_TYPE_U2:
904                                 case MONO_TYPE_I4:
905                                 case MONO_TYPE_U4:
906                                 case MONO_TYPE_STRING:
907                                 case MONO_TYPE_CLASS:
908                                 case MONO_TYPE_OBJECT:
909                                 case MONO_TYPE_PTR:
910                                 case MONO_TYPE_FNPTR:
911                                 case MONO_TYPE_ARRAY:
912                                 case MONO_TYPE_SZARRAY:
913                                         stack_size += 4;
914                                         break;
915                                 case MONO_TYPE_I8:
916                                 case MONO_TYPE_U8:
917                                         stack_size += 8;
918                                         break;
919                                 case MONO_TYPE_R4:
920                                         stack_size += 4;
921                                         arg->opcode = OP_OUTARG_R4;
922                                         break;
923                                 case MONO_TYPE_R8:
924                                         stack_size += 8;
925                                         arg->opcode = OP_OUTARG_R8;
926                                         break;
927                                 case MONO_TYPE_VALUETYPE: {
928                                         int size;
929                                         if (sig->pinvoke) 
930                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
931                                         else 
932                                                 size = mono_type_stack_size (&in->klass->byval_arg, NULL);
933
934                                         stack_size += size;
935                                         arg->opcode = OP_OUTARG_VT;
936                                         arg->klass = in->klass;
937                                         arg->unused = sig->pinvoke;
938                                         arg->inst_imm = size; 
939                                         break;
940                                 }
941                                 case MONO_TYPE_TYPEDBYREF:
942                                         stack_size += sizeof (MonoTypedRef);
943                                         arg->opcode = OP_OUTARG_VT;
944                                         arg->klass = in->klass;
945                                         arg->unused = sig->pinvoke;
946                                         arg->inst_imm = sizeof (MonoTypedRef); 
947                                         break;
948                                 default:
949                                         g_error ("unknown type 0x%02x in mono_arch_call_opcode\n", type);
950                                 }
951                         } else {
952                                 /* the this argument */
953                                 stack_size += 4;
954                         }
955                 }
956         }
957
958         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
959                 if (cinfo->ret.storage == ArgValuetypeInReg) {
960                         MonoInst *zero_inst;
961                         /*
962                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
963                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
964                          * before calling the function. So we add a dummy instruction to represent pushing the 
965                          * struct return address to the stack. The return address will be saved to this stack slot 
966                          * by the code emitted in this_vret_args.
967                          */
968                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
969                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
970                         zero_inst->inst_p0 = 0;
971                         arg->inst_left = zero_inst;
972                         arg->type = STACK_PTR;
973                         /* prepend, so they get reversed */
974                         arg->next = call->out_args;
975                         call->out_args = arg;
976                 }
977                 else
978                         /* if the function returns a struct, the called method already does a ret $0x4 */
979                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
980                                 stack_size -= 4;
981         }
982
983         call->stack_usage = stack_size;
984         g_free (cinfo);
985
986         /* 
987          * should set more info in call, such as the stack space
988          * used by the args that needs to be added back to esp
989          */
990
991         return call;
992 }
993
994 /*
995  * Allow tracing to work with this interface (with an optional argument)
996  */
997 void*
998 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
999 {
1000         guchar *code = p;
1001
1002         /* if some args are passed in registers, we need to save them here */
1003         x86_push_reg (code, X86_EBP);
1004
1005         if (cfg->compile_aot) {
1006                 x86_push_imm (code, cfg->method);
1007                 x86_mov_reg_imm (code, X86_EAX, func);
1008                 x86_call_reg (code, X86_EAX);
1009         } else {
1010                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1011                 x86_push_imm (code, cfg->method);
1012                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1013                 x86_call_code (code, 0);
1014         }
1015         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1016
1017         return code;
1018 }
1019
1020 enum {
1021         SAVE_NONE,
1022         SAVE_STRUCT,
1023         SAVE_EAX,
1024         SAVE_EAX_EDX,
1025         SAVE_FP
1026 };
1027
1028 void*
1029 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1030 {
1031         guchar *code = p;
1032         int arg_size = 0, save_mode = SAVE_NONE;
1033         MonoMethod *method = cfg->method;
1034         
1035         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1036         case MONO_TYPE_VOID:
1037                 /* special case string .ctor icall */
1038                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1039                         save_mode = SAVE_EAX;
1040                 else
1041                         save_mode = SAVE_NONE;
1042                 break;
1043         case MONO_TYPE_I8:
1044         case MONO_TYPE_U8:
1045                 save_mode = SAVE_EAX_EDX;
1046                 break;
1047         case MONO_TYPE_R4:
1048         case MONO_TYPE_R8:
1049                 save_mode = SAVE_FP;
1050                 break;
1051         case MONO_TYPE_VALUETYPE:
1052                 save_mode = SAVE_STRUCT;
1053                 break;
1054         default:
1055                 save_mode = SAVE_EAX;
1056                 break;
1057         }
1058
1059         switch (save_mode) {
1060         case SAVE_EAX_EDX:
1061                 x86_push_reg (code, X86_EDX);
1062                 x86_push_reg (code, X86_EAX);
1063                 if (enable_arguments) {
1064                         x86_push_reg (code, X86_EDX);
1065                         x86_push_reg (code, X86_EAX);
1066                         arg_size = 8;
1067                 }
1068                 break;
1069         case SAVE_EAX:
1070                 x86_push_reg (code, X86_EAX);
1071                 if (enable_arguments) {
1072                         x86_push_reg (code, X86_EAX);
1073                         arg_size = 4;
1074                 }
1075                 break;
1076         case SAVE_FP:
1077                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1078                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1079                 if (enable_arguments) {
1080                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1081                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1082                         arg_size = 8;
1083                 }
1084                 break;
1085         case SAVE_STRUCT:
1086                 if (enable_arguments) {
1087                         x86_push_membase (code, X86_EBP, 8);
1088                         arg_size = 4;
1089                 }
1090                 break;
1091         case SAVE_NONE:
1092         default:
1093                 break;
1094         }
1095
1096         if (cfg->compile_aot) {
1097                 x86_push_imm (code, method);
1098                 x86_mov_reg_imm (code, X86_EAX, func);
1099                 x86_call_reg (code, X86_EAX);
1100         } else {
1101                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1102                 x86_push_imm (code, method);
1103                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1104                 x86_call_code (code, 0);
1105         }
1106         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1107
1108         switch (save_mode) {
1109         case SAVE_EAX_EDX:
1110                 x86_pop_reg (code, X86_EAX);
1111                 x86_pop_reg (code, X86_EDX);
1112                 break;
1113         case SAVE_EAX:
1114                 x86_pop_reg (code, X86_EAX);
1115                 break;
1116         case SAVE_FP:
1117                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1118                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1119                 break;
1120         case SAVE_NONE:
1121         default:
1122                 break;
1123         }
1124
1125         return code;
1126 }
1127
1128 #define EMIT_COND_BRANCH(ins,cond,sign) \
1129 if (ins->flags & MONO_INST_BRLABEL) { \
1130         if (ins->inst_i0->inst_c0) { \
1131                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1132         } else { \
1133                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1134                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1135                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1136                         x86_branch8 (code, cond, 0, sign); \
1137                 else \
1138                         x86_branch32 (code, cond, 0, sign); \
1139         } \
1140 } else { \
1141         if (ins->inst_true_bb->native_offset) { \
1142                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1143         } else { \
1144                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1145                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1146                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1147                         x86_branch8 (code, cond, 0, sign); \
1148                 else \
1149                         x86_branch32 (code, cond, 0, sign); \
1150         } \
1151 }
1152
1153 /* emit an exception if condition is fail */
1154 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1155         do {                                                        \
1156                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1157                                     MONO_PATCH_INFO_EXC, exc_name);  \
1158                 x86_branch32 (code, cond, 0, signed);               \
1159         } while (0); 
1160
1161 #define EMIT_FPCOMPARE(code) do { \
1162         x86_fcompp (code); \
1163         x86_fnstsw (code); \
1164 } while (0); 
1165
1166
1167 static guint8*
1168 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1169 {
1170         if (cfg->compile_aot) {
1171                 guint32 got_reg = X86_EAX;
1172
1173                 if (cfg->compile_aot) {          
1174                         /*
1175                          * Since the patches are generated by the back end, there is
1176                          * no way to generate a got_var at this point.
1177                          */
1178                         g_assert (cfg->got_var);
1179
1180                         if (cfg->got_var->opcode == OP_REGOFFSET)
1181                                 x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
1182                         else
1183                                 got_reg = cfg->got_var->dreg;
1184                 }
1185
1186                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1187                 x86_call_membase (code, got_reg, 0xf0f0f0f0);
1188         }
1189         else {
1190                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1191                 x86_call_code (code, 0);
1192         }
1193
1194         return code;
1195 }
1196
1197 /* FIXME: Add more instructions */
1198 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1199
1200 static void
1201 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1202 {
1203         MonoInst *ins, *last_ins = NULL;
1204         ins = bb->code;
1205
1206         while (ins) {
1207
1208                 switch (ins->opcode) {
1209                 case OP_ICONST:
1210                         /* reg = 0 -> XOR (reg, reg) */
1211                         /* XOR sets cflags on x86, so we cant do it always */
1212                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1213                                 ins->opcode = CEE_XOR;
1214                                 ins->sreg1 = ins->dreg;
1215                                 ins->sreg2 = ins->dreg;
1216                         }
1217                         break;
1218                 case OP_MUL_IMM: 
1219                         /* remove unnecessary multiplication with 1 */
1220                         if (ins->inst_imm == 1) {
1221                                 if (ins->dreg != ins->sreg1) {
1222                                         ins->opcode = OP_MOVE;
1223                                 } else {
1224                                         last_ins->next = ins->next;
1225                                         ins = ins->next;
1226                                         continue;
1227                                 }
1228                         }
1229                         break;
1230                 case OP_COMPARE_IMM:
1231                         /* OP_COMPARE_IMM (reg, 0) 
1232                          * --> 
1233                          * OP_X86_TEST_NULL (reg) 
1234                          */
1235                         if (!ins->inst_imm)
1236                                 ins->opcode = OP_X86_TEST_NULL;
1237                         break;
1238                 case OP_X86_COMPARE_MEMBASE_IMM:
1239                         /* 
1240                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1241                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1242                          * -->
1243                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1244                          * OP_COMPARE_IMM reg, imm
1245                          *
1246                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1247                          */
1248                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1249                             ins->inst_basereg == last_ins->inst_destbasereg &&
1250                             ins->inst_offset == last_ins->inst_offset) {
1251                                         ins->opcode = OP_COMPARE_IMM;
1252                                         ins->sreg1 = last_ins->sreg1;
1253
1254                                         /* check if we can remove cmp reg,0 with test null */
1255                                         if (!ins->inst_imm)
1256                                                 ins->opcode = OP_X86_TEST_NULL;
1257                                 }
1258
1259                         break;
1260                 case OP_LOAD_MEMBASE:
1261                 case OP_LOADI4_MEMBASE:
1262                         /* 
1263                          * Note: if reg1 = reg2 the load op is removed
1264                          *
1265                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1266                          * OP_LOAD_MEMBASE offset(basereg), reg2
1267                          * -->
1268                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1269                          * OP_MOVE reg1, reg2
1270                          */
1271                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1272                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1273                             ins->inst_basereg == last_ins->inst_destbasereg &&
1274                             ins->inst_offset == last_ins->inst_offset) {
1275                                 if (ins->dreg == last_ins->sreg1) {
1276                                         last_ins->next = ins->next;                             
1277                                         ins = ins->next;                                
1278                                         continue;
1279                                 } else {
1280                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1281                                         ins->opcode = OP_MOVE;
1282                                         ins->sreg1 = last_ins->sreg1;
1283                                 }
1284
1285                         /* 
1286                          * Note: reg1 must be different from the basereg in the second load
1287                          * Note: if reg1 = reg2 is equal then second load is removed
1288                          *
1289                          * OP_LOAD_MEMBASE offset(basereg), reg1
1290                          * OP_LOAD_MEMBASE offset(basereg), reg2
1291                          * -->
1292                          * OP_LOAD_MEMBASE offset(basereg), reg1
1293                          * OP_MOVE reg1, reg2
1294                          */
1295                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1296                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1297                               ins->inst_basereg != last_ins->dreg &&
1298                               ins->inst_basereg == last_ins->inst_basereg &&
1299                               ins->inst_offset == last_ins->inst_offset) {
1300
1301                                 if (ins->dreg == last_ins->dreg) {
1302                                         last_ins->next = ins->next;                             
1303                                         ins = ins->next;                                
1304                                         continue;
1305                                 } else {
1306                                         ins->opcode = OP_MOVE;
1307                                         ins->sreg1 = last_ins->dreg;
1308                                 }
1309
1310                                 //g_assert_not_reached ();
1311
1312 #if 0
1313                         /* 
1314                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1315                          * OP_LOAD_MEMBASE offset(basereg), reg
1316                          * -->
1317                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1318                          * OP_ICONST reg, imm
1319                          */
1320                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1321                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1322                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1323                                    ins->inst_offset == last_ins->inst_offset) {
1324                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1325                                 ins->opcode = OP_ICONST;
1326                                 ins->inst_c0 = last_ins->inst_imm;
1327                                 g_assert_not_reached (); // check this rule
1328 #endif
1329                         }
1330                         break;
1331                 case OP_LOADU1_MEMBASE:
1332                 case OP_LOADI1_MEMBASE:
1333                         /* 
1334                          * Note: if reg1 = reg2 the load op is removed
1335                          *
1336                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1337                          * OP_LOAD_MEMBASE offset(basereg), reg2
1338                          * -->
1339                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1340                          * OP_MOVE reg1, reg2
1341                          */
1342                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1343                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1344                                         ins->inst_offset == last_ins->inst_offset) {
1345                                 if (ins->dreg == last_ins->sreg1) {
1346                                         last_ins->next = ins->next;                             
1347                                         ins = ins->next;                                
1348                                         continue;
1349                                 } else {
1350                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1351                                         ins->opcode = OP_MOVE;
1352                                         ins->sreg1 = last_ins->sreg1;
1353                                 }
1354                         }
1355                         break;
1356                 case OP_LOADU2_MEMBASE:
1357                 case OP_LOADI2_MEMBASE:
1358                         /* 
1359                          * Note: if reg1 = reg2 the load op is removed
1360                          *
1361                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1362                          * OP_LOAD_MEMBASE offset(basereg), reg2
1363                          * -->
1364                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1365                          * OP_MOVE reg1, reg2
1366                          */
1367                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1368                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1369                                         ins->inst_offset == last_ins->inst_offset) {
1370                                 if (ins->dreg == last_ins->sreg1) {
1371                                         last_ins->next = ins->next;                             
1372                                         ins = ins->next;                                
1373                                         continue;
1374                                 } else {
1375                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1376                                         ins->opcode = OP_MOVE;
1377                                         ins->sreg1 = last_ins->sreg1;
1378                                 }
1379                         }
1380                         break;
1381                 case CEE_CONV_I4:
1382                 case CEE_CONV_U4:
1383                 case OP_MOVE:
1384                         /*
1385                          * Removes:
1386                          *
1387                          * OP_MOVE reg, reg 
1388                          */
1389                         if (ins->dreg == ins->sreg1) {
1390                                 if (last_ins)
1391                                         last_ins->next = ins->next;                             
1392                                 ins = ins->next;
1393                                 continue;
1394                         }
1395                         /* 
1396                          * Removes:
1397                          *
1398                          * OP_MOVE sreg, dreg 
1399                          * OP_MOVE dreg, sreg
1400                          */
1401                         if (last_ins && last_ins->opcode == OP_MOVE &&
1402                             ins->sreg1 == last_ins->dreg &&
1403                             ins->dreg == last_ins->sreg1) {
1404                                 last_ins->next = ins->next;                             
1405                                 ins = ins->next;                                
1406                                 continue;
1407                         }
1408                         break;
1409                         
1410                 case OP_X86_PUSH_MEMBASE:
1411                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1412                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1413                             ins->inst_basereg == last_ins->inst_destbasereg &&
1414                             ins->inst_offset == last_ins->inst_offset) {
1415                                     ins->opcode = OP_X86_PUSH;
1416                                     ins->sreg1 = last_ins->sreg1;
1417                         }
1418                         break;
1419                 }
1420                 last_ins = ins;
1421                 ins = ins->next;
1422         }
1423         bb->last_ins = last_ins;
1424 }
1425
1426 static const int 
1427 branch_cc_table [] = {
1428         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1429         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1430         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1431 };
1432
1433 #define DEBUG(a) if (cfg->verbose_level > 1) a
1434 //#define DEBUG(a)
1435
1436 /*
1437  * returns the offset used by spillvar. It allocates a new
1438  * spill variable if necessary. 
1439  */
1440 static int
1441 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1442 {
1443         MonoSpillInfo **si, *info;
1444         int i = 0;
1445
1446         si = &cfg->spill_info; 
1447         
1448         while (i <= spillvar) {
1449
1450                 if (!*si) {
1451                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1452                         info->next = NULL;
1453                         cfg->stack_offset -= sizeof (gpointer);
1454                         info->offset = cfg->stack_offset;
1455                 }
1456
1457                 if (i == spillvar)
1458                         return (*si)->offset;
1459
1460                 i++;
1461                 si = &(*si)->next;
1462         }
1463
1464         g_assert_not_reached ();
1465         return 0;
1466 }
1467
1468 /*
1469  * returns the offset used by spillvar. It allocates a new
1470  * spill float variable if necessary. 
1471  * (same as mono_spillvar_offset but for float)
1472  */
1473 static int
1474 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1475 {
1476         MonoSpillInfo **si, *info;
1477         int i = 0;
1478
1479         si = &cfg->spill_info_float; 
1480         
1481         while (i <= spillvar) {
1482
1483                 if (!*si) {
1484                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1485                         info->next = NULL;
1486                         cfg->stack_offset -= sizeof (double);
1487                         info->offset = cfg->stack_offset;
1488                 }
1489
1490                 if (i == spillvar)
1491                         return (*si)->offset;
1492
1493                 i++;
1494                 si = &(*si)->next;
1495         }
1496
1497         g_assert_not_reached ();
1498         return 0;
1499 }
1500
1501 /*
1502  * Creates a store for spilled floating point items
1503  */
1504 static MonoInst*
1505 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1506 {
1507         MonoInst *store;
1508         MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1509         store->sreg1 = reg;
1510         store->inst_destbasereg = X86_EBP;
1511         store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1512
1513         DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08x(%%sp)) (from %d)\n", spill, store->inst_offset, reg));
1514         return store;
1515 }
1516
1517 /*
1518  * Creates a load for spilled floating point items 
1519  */
1520 static MonoInst*
1521 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1522 {
1523         MonoInst *load;
1524         MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1525         load->dreg = reg;
1526         load->inst_basereg = X86_EBP;
1527         load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1528
1529         DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08x(%%sp)) (from %d)\n", spill, load->inst_offset, reg));
1530         return load;
1531 }
1532
1533 #define is_global_ireg(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && !X86_IS_CALLEE ((r)))
1534 #define reg_is_freeable(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && X86_IS_CALLEE ((r)))
1535
1536 typedef struct {
1537         int born_in;
1538         int killed_in;
1539         int last_use;
1540         int prev_use;
1541         int flags;              /* used to track fp spill/load */
1542 } RegTrack;
1543
1544 static const char*const * ins_spec = pentium_desc;
1545
1546 static void
1547 print_ins (int i, MonoInst *ins)
1548 {
1549         const char *spec = ins_spec [ins->opcode];
1550         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1551         if (spec [MONO_INST_DEST]) {
1552                 if (ins->dreg >= MONO_MAX_IREGS)
1553                         g_print (" R%d <-", ins->dreg);
1554                 else
1555                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1556         }
1557         if (spec [MONO_INST_SRC1]) {
1558                 if (ins->sreg1 >= MONO_MAX_IREGS)
1559                         g_print (" R%d", ins->sreg1);
1560                 else
1561                         g_print (" %s", mono_arch_regname (ins->sreg1));
1562         }
1563         if (spec [MONO_INST_SRC2]) {
1564                 if (ins->sreg2 >= MONO_MAX_IREGS)
1565                         g_print (" R%d", ins->sreg2);
1566                 else
1567                         g_print (" %s", mono_arch_regname (ins->sreg2));
1568         }
1569         if (spec [MONO_INST_CLOB])
1570                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1571         g_print ("\n");
1572 }
1573
1574 static void
1575 print_regtrack (RegTrack *t, int num)
1576 {
1577         int i;
1578         char buf [32];
1579         const char *r;
1580         
1581         for (i = 0; i < num; ++i) {
1582                 if (!t [i].born_in)
1583                         continue;
1584                 if (i >= MONO_MAX_IREGS) {
1585                         g_snprintf (buf, sizeof(buf), "R%d", i);
1586                         r = buf;
1587                 } else
1588                         r = mono_arch_regname (i);
1589                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1590         }
1591 }
1592
1593 typedef struct InstList InstList;
1594
1595 struct InstList {
1596         InstList *prev;
1597         InstList *next;
1598         MonoInst *data;
1599 };
1600
1601 static inline InstList*
1602 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1603 {
1604         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1605         item->data = data;
1606         item->prev = NULL;
1607         item->next = list;
1608         if (list)
1609                 list->prev = item;
1610         return item;
1611 }
1612
1613 /*
1614  * Force the spilling of the variable in the symbolic register 'reg'.
1615  */
1616 static int
1617 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1618 {
1619         MonoInst *load;
1620         int i, sel, spill;
1621         
1622         sel = cfg->rs->iassign [reg];
1623         /*i = cfg->rs->isymbolic [sel];
1624         g_assert (i == reg);*/
1625         i = reg;
1626         spill = ++cfg->spill_count;
1627         cfg->rs->iassign [i] = -spill - 1;
1628         mono_regstate_free_int (cfg->rs, sel);
1629         /* we need to create a spill var and insert a load to sel after the current instruction */
1630         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1631         load->dreg = sel;
1632         load->inst_basereg = X86_EBP;
1633         load->inst_offset = mono_spillvar_offset (cfg, spill);
1634         if (item->prev) {
1635                 while (ins->next != item->prev->data)
1636                         ins = ins->next;
1637         }
1638         load->next = ins->next;
1639         ins->next = load;
1640         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1641         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1642         g_assert (i == sel);
1643
1644         return sel;
1645 }
1646
1647 static int
1648 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1649 {
1650         MonoInst *load;
1651         int i, sel, spill;
1652
1653         DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1654         /* exclude the registers in the current instruction */
1655         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1656                 if (ins->sreg1 >= MONO_MAX_IREGS)
1657                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1658                 else
1659                         regmask &= ~ (1 << ins->sreg1);
1660                 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1661         }
1662         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1663                 if (ins->sreg2 >= MONO_MAX_IREGS)
1664                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1665                 else
1666                         regmask &= ~ (1 << ins->sreg2);
1667                 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1668         }
1669         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1670                 regmask &= ~ (1 << ins->dreg);
1671                 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1672         }
1673
1674         DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1675         g_assert (regmask); /* need at least a register we can free */
1676         sel = -1;
1677         /* we should track prev_use and spill the register that's farther */
1678         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1679                 if (regmask & (1 << i)) {
1680                         sel = i;
1681                         DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1682                         break;
1683                 }
1684         }
1685         i = cfg->rs->isymbolic [sel];
1686         spill = ++cfg->spill_count;
1687         cfg->rs->iassign [i] = -spill - 1;
1688         mono_regstate_free_int (cfg->rs, sel);
1689         /* we need to create a spill var and insert a load to sel after the current instruction */
1690         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1691         load->dreg = sel;
1692         load->inst_basereg = X86_EBP;
1693         load->inst_offset = mono_spillvar_offset (cfg, spill);
1694         if (item->prev) {
1695                 while (ins->next != item->prev->data)
1696                         ins = ins->next;
1697         }
1698         load->next = ins->next;
1699         ins->next = load;
1700         DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1701         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1702         g_assert (i == sel);
1703         
1704         return sel;
1705 }
1706
1707 static MonoInst*
1708 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1709 {
1710         MonoInst *copy;
1711         MONO_INST_NEW (cfg, copy, OP_MOVE);
1712         copy->dreg = dest;
1713         copy->sreg1 = src;
1714         if (ins) {
1715                 copy->next = ins->next;
1716                 ins->next = copy;
1717         }
1718         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1719         return copy;
1720 }
1721
1722 static MonoInst*
1723 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1724 {
1725         MonoInst *store;
1726         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1727         store->sreg1 = reg;
1728         store->inst_destbasereg = X86_EBP;
1729         store->inst_offset = mono_spillvar_offset (cfg, spill);
1730         if (ins) {
1731                 store->next = ins->next;
1732                 ins->next = store;
1733         }
1734         DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08x(%%ebp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
1735         return store;
1736 }
1737
1738 static void
1739 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1740 {
1741         MonoInst *prev;
1742         if (item->next) {
1743                 prev = item->next->data;
1744
1745                 while (prev->next != ins)
1746                         prev = prev->next;
1747                 to_insert->next = ins;
1748                 prev->next = to_insert;
1749         } else {
1750                 to_insert->next = ins;
1751         }
1752         /* 
1753          * needed otherwise in the next instruction we can add an ins to the 
1754          * end and that would get past this instruction.
1755          */
1756         item->data = to_insert; 
1757 }
1758
1759
1760 #if  0
1761 static int
1762 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1763 {
1764         int val = cfg->rs->iassign [sym_reg];
1765         if (val < 0) {
1766                 int spill = 0;
1767                 if (val < -1) {
1768                         /* the register gets spilled after this inst */
1769                         spill = -val -1;
1770                 }
1771                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1772                 if (val < 0)
1773                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1774                 cfg->rs->iassign [sym_reg] = val;
1775                 /* add option to store before the instruction for src registers */
1776                 if (spill)
1777                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1778         }
1779         cfg->rs->isymbolic [val] = sym_reg;
1780         return val;
1781 }
1782 #endif
1783
1784 /* flags used in reginfo->flags */
1785 enum {
1786         MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
1787         MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
1788         MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
1789         MONO_X86_REG_NOT_ECX                    = 1 << 3,
1790         MONO_X86_REG_EAX                                = 1 << 4,
1791         MONO_X86_REG_EDX                                = 1 << 5,
1792         MONO_X86_REG_ECX                                = 1 << 6
1793 };
1794
1795 static int
1796 mono_x86_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1797 {
1798         int val;
1799         int test_mask = dest_mask;
1800
1801         if (flags & MONO_X86_REG_EAX)
1802                 test_mask &= (1 << X86_EAX);
1803         else if (flags & MONO_X86_REG_EDX)
1804                 test_mask &= (1 << X86_EDX);
1805         else if (flags & MONO_X86_REG_ECX)
1806                 test_mask &= (1 << X86_ECX);
1807         else if (flags & MONO_X86_REG_NOT_ECX)
1808                 test_mask &= ~ (1 << X86_ECX);
1809
1810         val = mono_regstate_alloc_int (cfg->rs, test_mask);
1811         if (val >= 0 && test_mask != dest_mask)
1812                 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1813
1814         if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1815                 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1816                 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << X86_ECX)));
1817         }
1818
1819         if (val < 0) {
1820                 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1821                 if (val < 0)
1822                         val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1823         }
1824
1825         return val;
1826 }
1827
1828 static inline void
1829 assign_ireg (MonoRegState *rs, int reg, int hreg)
1830 {
1831         g_assert (reg >= MONO_MAX_IREGS);
1832         g_assert (hreg < MONO_MAX_IREGS);
1833         g_assert (! is_global_ireg (hreg));
1834
1835         rs->iassign [reg] = hreg;
1836         rs->isymbolic [hreg] = reg;
1837         rs->ifree_mask &= ~ (1 << hreg);
1838 }
1839
1840 /*#include "cprop.c"*/
1841
1842 /*
1843  * Local register allocation.
1844  * We first scan the list of instructions and we save the liveness info of
1845  * each register (when the register is first used, when it's value is set etc.).
1846  * We also reverse the list of instructions (in the InstList list) because assigning
1847  * registers backwards allows for more tricks to be used.
1848  */
1849 void
1850 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1851 {
1852         MonoInst *ins;
1853         MonoRegState *rs = cfg->rs;
1854         int i, val, fpcount;
1855         RegTrack *reginfo, *reginfof;
1856         RegTrack *reginfo1, *reginfo2, *reginfod;
1857         InstList *tmp, *reversed = NULL;
1858         const char *spec;
1859         guint32 src1_mask, src2_mask, dest_mask;
1860         GList *fspill_list = NULL;
1861         int fspill = 0;
1862
1863         if (!bb->code)
1864                 return;
1865         rs->next_vireg = bb->max_ireg;
1866         rs->next_vfreg = bb->max_freg;
1867         mono_regstate_assign (rs);
1868         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1869         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1870         rs->ifree_mask = X86_CALLEE_REGS;
1871
1872         ins = bb->code;
1873
1874         /*if (cfg->opt & MONO_OPT_COPYPROP)
1875                 local_copy_prop (cfg, ins);*/
1876
1877         i = 1;
1878         fpcount = 0;
1879         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1880         /* forward pass on the instructions to collect register liveness info */
1881         while (ins) {
1882                 spec = ins_spec [ins->opcode];
1883                 
1884                 DEBUG (print_ins (i, ins));
1885
1886                 if (spec [MONO_INST_SRC1]) {
1887                         if (spec [MONO_INST_SRC1] == 'f') {
1888                                 GList *spill;
1889                                 reginfo1 = reginfof;
1890
1891                                 spill = g_list_first (fspill_list);
1892                                 if (spill && fpcount < MONO_MAX_FREGS) {
1893                                         reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1894                                         fspill_list = g_list_remove (fspill_list, spill->data);
1895                                 } else
1896                                         fpcount--;
1897                         }
1898                         else
1899                                 reginfo1 = reginfo;
1900                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1901                         reginfo1 [ins->sreg1].last_use = i;
1902                         if (spec [MONO_INST_SRC1] == 'L') {
1903                                 /* The virtual register is allocated sequentially */
1904                                 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1905                                 reginfo1 [ins->sreg1 + 1].last_use = i;
1906                                 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1907                                         reginfo1 [ins->sreg1 + 1].born_in = i;
1908
1909                                 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1910                                 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1911                         }
1912                 } else {
1913                         ins->sreg1 = -1;
1914                 }
1915                 if (spec [MONO_INST_SRC2]) {
1916                         if (spec [MONO_INST_SRC2] == 'f') {
1917                                 GList *spill;
1918                                 reginfo2 = reginfof;
1919                                 spill = g_list_first (fspill_list);
1920                                 if (spill) {
1921                                         reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1922                                         fspill_list = g_list_remove (fspill_list, spill->data);
1923                                         if (fpcount >= MONO_MAX_FREGS) {
1924                                                 fspill++;
1925                                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1926                                                 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1927                                         }
1928                                 } else
1929                                         fpcount--;
1930                         }
1931                         else
1932                                 reginfo2 = reginfo;
1933                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1934                         reginfo2 [ins->sreg2].last_use = i;
1935                         if (spec [MONO_INST_SRC2] == 'L') {
1936                                 /* The virtual register is allocated sequentially */
1937                                 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1938                                 reginfo2 [ins->sreg2 + 1].last_use = i;
1939                                 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1940                                         reginfo2 [ins->sreg2 + 1].born_in = i;
1941                         }
1942                         if (spec [MONO_INST_CLOB] == 's') {
1943                                 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1944                                 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1945                         }
1946                 } else {
1947                         ins->sreg2 = -1;
1948                 }
1949                 if (spec [MONO_INST_DEST]) {
1950                         if (spec [MONO_INST_DEST] == 'f') {
1951                                 reginfod = reginfof;
1952                                 if (fpcount >= MONO_MAX_FREGS) {
1953                                         reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1954                                         fspill++;
1955                                         fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1956                                         fpcount--;
1957                                 }
1958                                 fpcount++;
1959                         }
1960                         else
1961                                 reginfod = reginfo;
1962                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1963                                 reginfod [ins->dreg].killed_in = i;
1964                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1965                         reginfod [ins->dreg].last_use = i;
1966                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1967                                 reginfod [ins->dreg].born_in = i;
1968                         if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
1969                                 /* The virtual register is allocated sequentially */
1970                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1971                                 reginfod [ins->dreg + 1].last_use = i;
1972                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1973                                         reginfod [ins->dreg + 1].born_in = i;
1974
1975                                 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
1976                                 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
1977                         }
1978                 } else {
1979                         ins->dreg = -1;
1980                 }
1981
1982                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
1983                 ++i;
1984                 ins = ins->next;
1985         }
1986
1987         // todo: check if we have anything left on fp stack, in verify mode?
1988         fspill = 0;
1989
1990         DEBUG (print_regtrack (reginfo, rs->next_vireg));
1991         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
1992         tmp = reversed;
1993         while (tmp) {
1994                 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
1995                 dest_mask = src1_mask = src2_mask = X86_CALLEE_REGS;
1996                 --i;
1997                 ins = tmp->data;
1998                 spec = ins_spec [ins->opcode];
1999                 prev_dreg = -1;
2000                 clob_dreg = -1;
2001                 DEBUG (g_print ("processing:"));
2002                 DEBUG (print_ins (i, ins));
2003                 if (spec [MONO_INST_CLOB] == 's') {
2004                         /*
2005                          * Shift opcodes, SREG2 must be RCX
2006                          */
2007                         if (rs->ifree_mask & (1 << X86_ECX)) {
2008                                 if (ins->sreg2 < MONO_MAX_IREGS) {
2009                                         /* Argument already in hard reg, need to copy */
2010                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
2011                                         insert_before_ins (ins, tmp, copy);
2012                                 }
2013                                 else {
2014                                         DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
2015                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2016                                 }
2017                         } else {
2018                                 int need_ecx_spill = TRUE;
2019                                 /* 
2020                                  * we first check if src1/dreg is already assigned a register
2021                                  * and then we force a spill of the var assigned to ECX.
2022                                  */
2023                                 /* the destination register can't be ECX */
2024                                 dest_mask &= ~ (1 << X86_ECX);
2025                                 src1_mask &= ~ (1 << X86_ECX);
2026                                 val = rs->iassign [ins->dreg];
2027                                 /* 
2028                                  * the destination register is already assigned to ECX:
2029                                  * we need to allocate another register for it and then
2030                                  * copy from this to ECX.
2031                                  */
2032                                 if (val == X86_ECX && ins->dreg != ins->sreg2) {
2033                                         int new_dest;
2034                                         new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2035                                         g_assert (new_dest >= 0);
2036                                         DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
2037
2038                                         rs->isymbolic [new_dest] = ins->dreg;
2039                                         rs->iassign [ins->dreg] = new_dest;
2040                                         clob_dreg = ins->dreg;
2041                                         ins->dreg = new_dest;
2042                                         create_copy_ins (cfg, X86_ECX, new_dest, ins);
2043                                         need_ecx_spill = FALSE;
2044                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
2045                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
2046                                         rs->iassign [ins->dreg] = val;
2047                                         rs->isymbolic [val] = prev_dreg;
2048                                         ins->dreg = val;*/
2049                                 }
2050                                 if (is_global_ireg (ins->sreg2)) {
2051                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
2052                                         insert_before_ins (ins, tmp, copy);
2053                                 }
2054                                 else {
2055                                         val = rs->iassign [ins->sreg2];
2056                                         if (val >= 0 && val != X86_ECX) {
2057                                                 MonoInst *move = create_copy_ins (cfg, X86_ECX, val, NULL);
2058                                                 DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
2059                                                 move->next = ins;
2060                                                 g_assert_not_reached ();
2061                                                 /* FIXME: where is move connected to the instruction list? */
2062                                                 //tmp->prev->data->next = move;
2063                                         }
2064                                         else {
2065                                                 if (val == X86_ECX)
2066                                                 need_ecx_spill = FALSE;
2067                                         }
2068                                 }
2069                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << X86_ECX))) {
2070                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_ECX]));
2071                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_ECX]);
2072                                         mono_regstate_free_int (rs, X86_ECX);
2073                                 }
2074                                 if (!is_global_ireg (ins->sreg2))
2075                                         /* force-set sreg2 */
2076                                         assign_ireg (rs, ins->sreg2, X86_ECX);
2077                         }
2078                         ins->sreg2 = X86_ECX;
2079                 } else if (spec [MONO_INST_CLOB] == 'd') {
2080                         /*
2081                          * DIVISION/REMAINER
2082                          */
2083                         int dest_reg = X86_EAX;
2084                         int clob_reg = X86_EDX;
2085                         if (spec [MONO_INST_DEST] == 'd') {
2086                                 dest_reg = X86_EDX; /* reminder */
2087                                 clob_reg = X86_EAX;
2088                         }
2089                         if (is_global_ireg (ins->dreg))
2090                                 val = ins->dreg;
2091                         else
2092                                 val = rs->iassign [ins->dreg];
2093                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
2094                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2095                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2096                                 mono_regstate_free_int (rs, dest_reg);
2097                         }
2098                         if (val < 0) {
2099                                 if (val < -1) {
2100                                         /* the register gets spilled after this inst */
2101                                         int spill = -val -1;
2102                                         dest_mask = 1 << dest_reg;
2103                                         prev_dreg = ins->dreg;
2104                                         val = mono_regstate_alloc_int (rs, dest_mask);
2105                                         if (val < 0)
2106                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
2107                                         rs->iassign [ins->dreg] = val;
2108                                         if (spill)
2109                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
2110                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2111                                         rs->isymbolic [val] = prev_dreg;
2112                                         ins->dreg = val;
2113                                 } else {
2114                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
2115                                         prev_dreg = ins->dreg;
2116                                         assign_ireg (rs, ins->dreg, dest_reg);
2117                                         ins->dreg = dest_reg;
2118                                         val = dest_reg;
2119                                 }
2120                         }
2121
2122                         //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
2123                         if (val != dest_reg) { /* force a copy */
2124                                 create_copy_ins (cfg, val, dest_reg, ins);
2125                                 if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
2126                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
2127                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
2128                                         mono_regstate_free_int (rs, dest_reg);
2129                                 }
2130                         }
2131                         if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
2132                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2133                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2134                                 mono_regstate_free_int (rs, clob_reg);
2135                         }
2136                         src1_mask = 1 << X86_EAX;
2137                         src2_mask = 1 << X86_ECX;
2138                 } else if (spec [MONO_INST_DEST] == 'l') {
2139                         int hreg;
2140                         val = rs->iassign [ins->dreg];
2141                         /* check special case when dreg have been moved from ecx (clob shift) */
2142                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2143                                 hreg = clob_dreg + 1;
2144                         else
2145                                 hreg = ins->dreg + 1;
2146
2147                         /* base prev_dreg on fixed hreg, handle clob case */
2148                         val = hreg - 1;
2149
2150                         if (val != rs->isymbolic [X86_EAX] && !(rs->ifree_mask & (1 << X86_EAX))) {
2151                                 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [X86_EAX]));
2152                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2153                                 mono_regstate_free_int (rs, X86_EAX);
2154                         }
2155                         if (hreg != rs->isymbolic [X86_EDX] && !(rs->ifree_mask & (1 << X86_EDX))) {
2156                                 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [X86_EDX]));
2157                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
2158                                 mono_regstate_free_int (rs, X86_EDX);
2159                         }
2160                 } else if (spec [MONO_INST_CLOB] == 'b') {
2161                         /*
2162                          * x86_set_reg instructions, dreg needs to be EAX..EDX
2163                          */     
2164                         dest_mask = (1 << X86_EAX) | (1 << X86_EBX) | (1 << X86_ECX) | (1 << X86_EDX);
2165                         if ((ins->dreg < MONO_MAX_IREGS) && (! (dest_mask & (1 << ins->dreg)))) {
2166                                 /* 
2167                                  * ins->dreg is already a hard reg, need to allocate another
2168                                  * suitable hard reg and make a copy.
2169                                  */
2170                                 int new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2171                                 g_assert (new_dest >= 0);
2172
2173                                 create_copy_ins (cfg, ins->dreg, new_dest, ins);
2174                                 DEBUG (g_print ("\tclob:b changing dreg R%d to %s\n", ins->dreg, mono_arch_regname (new_dest)));
2175                                 ins->dreg = new_dest;
2176
2177                                 /* The hard reg is no longer needed */
2178                                 mono_regstate_free_int (rs, new_dest);
2179                         }
2180                 }
2181
2182                 /*
2183                  * TRACK DREG
2184                  */
2185                 if (spec [MONO_INST_DEST] == 'f') {
2186                         if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
2187                                 GList *spill_node;
2188                                 MonoInst *store;
2189                                 spill_node = g_list_first (fspill_list);
2190                                 g_assert (spill_node);
2191
2192                                 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
2193                                 insert_before_ins (ins, tmp, store);
2194                                 fspill_list = g_list_remove (fspill_list, spill_node->data);
2195                                 fspill--;
2196                         }
2197                 } else if (spec [MONO_INST_DEST] == 'L') {
2198                         int hreg;
2199                         val = rs->iassign [ins->dreg];
2200                         /* check special case when dreg have been moved from ecx (clob shift) */
2201                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2202                                 hreg = clob_dreg + 1;
2203                         else
2204                                 hreg = ins->dreg + 1;
2205
2206                         /* base prev_dreg on fixed hreg, handle clob case */
2207                         prev_dreg = hreg - 1;
2208
2209                         if (val < 0) {
2210                                 int spill = 0;
2211                                 if (val < -1) {
2212                                         /* the register gets spilled after this inst */
2213                                         spill = -val -1;
2214                                 }
2215                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2216                                 rs->iassign [ins->dreg] = val;
2217                                 if (spill)
2218                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2219                         }
2220
2221                         DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
2222  
2223                         rs->isymbolic [val] = hreg - 1;
2224                         ins->dreg = val;
2225                         
2226                         val = rs->iassign [hreg];
2227                         if (val < 0) {
2228                                 int spill = 0;
2229                                 if (val < -1) {
2230                                         /* the register gets spilled after this inst */
2231                                         spill = -val -1;
2232                                 }
2233                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2234                                 rs->iassign [hreg] = val;
2235                                 if (spill)
2236                                         create_spilled_store (cfg, spill, val, hreg, ins);
2237                         }
2238
2239                         DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
2240                         rs->isymbolic [val] = hreg;
2241                         /* save reg allocating into unused */
2242                         ins->unused = val;
2243
2244                         /* check if we can free our long reg */
2245                         if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2246                                 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
2247                                 mono_regstate_free_int (rs, val);
2248                         }
2249                 }
2250                 else if (ins->dreg >= MONO_MAX_IREGS) {
2251                         int hreg;
2252                         val = rs->iassign [ins->dreg];
2253                         if (spec [MONO_INST_DEST] == 'l') {
2254                                 /* check special case when dreg have been moved from ecx (clob shift) */
2255                                 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
2256                                         hreg = clob_dreg + 1;
2257                                 else
2258                                         hreg = ins->dreg + 1;
2259
2260                                 /* base prev_dreg on fixed hreg, handle clob case */
2261                                 prev_dreg = hreg - 1;
2262                         } else
2263                                 prev_dreg = ins->dreg;
2264
2265                         if (val < 0) {
2266                                 int spill = 0;
2267                                 if (val < -1) {
2268                                         /* the register gets spilled after this inst */
2269                                         spill = -val -1;
2270                                 }
2271                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
2272                                 rs->iassign [ins->dreg] = val;
2273                                 if (spill)
2274                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
2275                         }
2276                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
2277                         rs->isymbolic [val] = prev_dreg;
2278                         ins->dreg = val;
2279                         /* handle cases where lreg needs to be eax:edx */
2280                         if (spec [MONO_INST_DEST] == 'l') {
2281                                 /* check special case when dreg have been moved from ecx (clob shift) */
2282                                 int hreg = prev_dreg + 1;
2283                                 val = rs->iassign [hreg];
2284                                 if (val < 0) {
2285                                         int spill = 0;
2286                                         if (val < -1) {
2287                                                 /* the register gets spilled after this inst */
2288                                                 spill = -val -1;
2289                                         }
2290                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
2291                                         rs->iassign [hreg] = val;
2292                                         if (spill)
2293                                                 create_spilled_store (cfg, spill, val, hreg, ins);
2294                                 }
2295                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
2296                                 rs->isymbolic [val] = hreg;
2297                                 if (ins->dreg == X86_EAX) {
2298                                         if (val != X86_EDX)
2299                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2300                                 } else if (ins->dreg == X86_EDX) {
2301                                         if (val == X86_EAX) {
2302                                                 /* swap */
2303                                                 g_assert_not_reached ();
2304                                         } else {
2305                                                 /* two forced copies */
2306                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2307                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2308                                         }
2309                                 } else {
2310                                         if (val == X86_EDX) {
2311                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2312                                         } else {
2313                                                 /* two forced copies */
2314                                                 create_copy_ins (cfg, val, X86_EDX, ins);
2315                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2316                                         }
2317                                 }
2318                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
2319                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
2320                                         mono_regstate_free_int (rs, val);
2321                                 }
2322                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != X86_EAX && spec [MONO_INST_CLOB] != 'd') {
2323                                 /* this instruction only outputs to EAX, need to copy */
2324                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
2325                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != X86_EDX && spec [MONO_INST_CLOB] != 'd') {
2326                                 create_copy_ins (cfg, ins->dreg, X86_EDX, ins);
2327                         }
2328                 }
2329                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
2330                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
2331                         mono_regstate_free_int (rs, ins->dreg);
2332                 }
2333                 /* put src1 in EAX if it needs to be */
2334                 if (spec [MONO_INST_SRC1] == 'a') {
2335                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
2336                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
2337                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
2338                                 mono_regstate_free_int (rs, X86_EAX);
2339                         }
2340                         if (ins->sreg1 < MONO_MAX_IREGS) {
2341                                 /* The argument is already in a hard reg, need to copy */
2342                                 MonoInst *copy = create_copy_ins (cfg, X86_EAX, ins->sreg1, NULL);
2343                                 insert_before_ins (ins, tmp, copy);
2344                         }
2345                         else
2346                                 /* force-set sreg1 */
2347                                 assign_ireg (rs, ins->sreg1, X86_EAX);
2348                         ins->sreg1 = X86_EAX;
2349                 }
2350
2351                 /*
2352                  * TRACK SREG1
2353                  */
2354                 if (spec [MONO_INST_SRC1] == 'f') {
2355                         if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
2356                                 MonoInst *load;
2357                                 MonoInst *store = NULL;
2358
2359                                 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2360                                         GList *spill_node;
2361                                         spill_node = g_list_first (fspill_list);
2362                                         g_assert (spill_node);
2363
2364                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);          
2365                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2366                                 }
2367
2368                                 fspill++;
2369                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2370                                 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
2371                                 insert_before_ins (ins, tmp, load);
2372                                 if (store) 
2373                                         insert_before_ins (load, tmp, store);
2374                         }
2375                 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
2376                         /* force source to be same as dest */
2377                         assign_ireg (rs, ins->sreg1, ins->dreg);
2378                         assign_ireg (rs, ins->sreg1 + 1, ins->unused);
2379
2380                         DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
2381                         DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
2382
2383                         ins->sreg1 = ins->dreg;
2384                         /* 
2385                          * No need for saving the reg, we know that src1=dest in this cases
2386                          * ins->inst_c0 = ins->unused;
2387                          */
2388                 }
2389                 else if (ins->sreg1 >= MONO_MAX_IREGS) {
2390                         val = rs->iassign [ins->sreg1];
2391                         prev_sreg1 = ins->sreg1;
2392                         if (val < 0) {
2393                                 int spill = 0;
2394                                 if (val < -1) {
2395                                         /* the register gets spilled after this inst */
2396                                         spill = -val -1;
2397                                 }
2398                                 if (0 && ins->opcode == OP_MOVE) {
2399                                         /* 
2400                                          * small optimization: the dest register is already allocated
2401                                          * but the src one is not: we can simply assign the same register
2402                                          * here and peephole will get rid of the instruction later.
2403                                          * This optimization may interfere with the clobbering handling:
2404                                          * it removes a mov operation that will be added again to handle clobbering.
2405                                          * There are also some other issues that should with make testjit.
2406                                          */
2407                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
2408                                         val = rs->iassign [ins->sreg1] = ins->dreg;
2409                                         //g_assert (val >= 0);
2410                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2411                                 } else {
2412                                         //g_assert (val == -1); /* source cannot be spilled */
2413                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
2414                                         rs->iassign [ins->sreg1] = val;
2415                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2416                                 }
2417                                 if (spill) {
2418                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
2419                                         insert_before_ins (ins, tmp, store);
2420                                 }
2421                         }
2422                         rs->isymbolic [val] = prev_sreg1;
2423                         ins->sreg1 = val;
2424                 } else {
2425                         prev_sreg1 = -1;
2426                 }
2427                 /* handle clobbering of sreg1 */
2428                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
2429                         MonoInst *sreg2_copy = NULL;
2430                         MonoInst *copy = NULL;
2431
2432                         if (ins->dreg == ins->sreg2) {
2433                                 /* 
2434                                  * copying sreg1 to dreg could clobber sreg2, so allocate a new
2435                                  * register for it.
2436                                  */
2437                                 int reg2 = 0;
2438
2439                                 reg2 = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->sreg2, 0);
2440
2441                                 DEBUG (g_print ("\tneed to copy sreg2 %s to reg %s\n", mono_arch_regname (ins->sreg2), mono_arch_regname (reg2)));
2442                                 sreg2_copy = create_copy_ins (cfg, reg2, ins->sreg2, NULL);
2443                                 prev_sreg2 = ins->sreg2 = reg2;
2444
2445                                 mono_regstate_free_int (rs, reg2);
2446                         }
2447
2448                         copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
2449                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
2450                         insert_before_ins (ins, tmp, copy);
2451
2452                         if (sreg2_copy)
2453                                 insert_before_ins (copy, tmp, sreg2_copy);
2454
2455                         /*
2456                          * Need to prevent sreg2 to be allocated to sreg1, since that
2457                          * would screw up the previous copy.
2458                          */
2459                         src2_mask &= ~ (1 << ins->sreg1);
2460                         /* we set sreg1 to dest as well */
2461                         prev_sreg1 = ins->sreg1 = ins->dreg;
2462                         src2_mask &= ~ (1 << ins->dreg);
2463                 }
2464
2465                 /*
2466                  * TRACK SREG2
2467                  */
2468                 if (spec [MONO_INST_SRC2] == 'f') {
2469                         if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
2470                                 MonoInst *load;
2471                                 MonoInst *store = NULL;
2472
2473                                 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2474                                         GList *spill_node;
2475
2476                                         spill_node = g_list_first (fspill_list);
2477                                         g_assert (spill_node);
2478                                         if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
2479                                                 spill_node = g_list_next (spill_node);
2480         
2481                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
2482                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2483                                 } 
2484                                 
2485                                 fspill++;
2486                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2487                                 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
2488                                 insert_before_ins (ins, tmp, load);
2489                                 if (store) 
2490                                         insert_before_ins (load, tmp, store);
2491                         }
2492                 } 
2493                 else if (ins->sreg2 >= MONO_MAX_IREGS) {
2494                         val = rs->iassign [ins->sreg2];
2495                         prev_sreg2 = ins->sreg2;
2496                         if (val < 0) {
2497                                 int spill = 0;
2498                                 if (val < -1) {
2499                                         /* the register gets spilled after this inst */
2500                                         spill = -val -1;
2501                                 }
2502                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
2503                                 rs->iassign [ins->sreg2] = val;
2504                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
2505                                 if (spill)
2506                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
2507                         }
2508                         rs->isymbolic [val] = prev_sreg2;
2509                         ins->sreg2 = val;
2510                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != X86_ECX) {
2511                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [X86_ECX]));
2512                         }
2513                 } else {
2514                         prev_sreg2 = -1;
2515                 }
2516
2517                 if (spec [MONO_INST_CLOB] == 'c') {
2518                         int j, s;
2519                         guint32 clob_mask = X86_CALLEE_REGS;
2520                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
2521                                 s = 1 << j;
2522                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2523                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2524                                 }
2525                         }
2526                 }
2527                 if (spec [MONO_INST_CLOB] == 'a') {
2528                         guint32 clob_reg = X86_EAX;
2529                         if (!(rs->ifree_mask & (1 << clob_reg)) && (rs->isymbolic [clob_reg] >= 8)) {
2530                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2531                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2532                                 mono_regstate_free_int (rs, clob_reg);
2533                         }
2534                 }
2535                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2536                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2537                         mono_regstate_free_int (rs, ins->sreg1);
2538                 }
2539                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2540                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2541                         mono_regstate_free_int (rs, ins->sreg2);
2542                 }*/
2543         
2544                 //DEBUG (print_ins (i, ins));
2545                 /* this may result from a insert_before call */
2546                 if (!tmp->next)
2547                         bb->code = tmp->data;
2548                 tmp = tmp->next;
2549         }
2550
2551         g_free (reginfo);
2552         g_free (reginfof);
2553         g_list_free (fspill_list);
2554 }
2555
2556 static unsigned char*
2557 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2558 {
2559         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2560         x86_fnstcw_membase(code, X86_ESP, 0);
2561         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
2562         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2563         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
2564         x86_fldcw_membase (code, X86_ESP, 2);
2565         if (size == 8) {
2566                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2567                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2568                 x86_pop_reg (code, dreg);
2569                 /* FIXME: need the high register 
2570                  * x86_pop_reg (code, dreg_high);
2571                  */
2572         } else {
2573                 x86_push_reg (code, X86_EAX); // SP = SP - 4
2574                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
2575                 x86_pop_reg (code, dreg);
2576         }
2577         x86_fldcw_membase (code, X86_ESP, 0);
2578         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2579
2580         if (size == 1)
2581                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
2582         else if (size == 2)
2583                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
2584         return code;
2585 }
2586
2587 static unsigned char*
2588 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2589 {
2590         int sreg = tree->sreg1;
2591 #ifdef PLATFORM_WIN32
2592         guint8* br[5];
2593
2594         /*
2595          * Under Windows:
2596          * If requested stack size is larger than one page,
2597          * perform stack-touch operation
2598          */
2599         /*
2600          * Generate stack probe code.
2601          * Under Windows, it is necessary to allocate one page at a time,
2602          * "touching" stack after each successful sub-allocation. This is
2603          * because of the way stack growth is implemented - there is a
2604          * guard page before the lowest stack page that is currently commited.
2605          * Stack normally grows sequentially so OS traps access to the
2606          * guard page and commits more pages when needed.
2607          */
2608         x86_test_reg_imm (code, sreg, ~0xFFF);
2609         br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2610
2611         br[2] = code; /* loop */
2612         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
2613         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
2614         x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2615         x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2616         br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2617         x86_patch (br[3], br[2]);
2618         x86_test_reg_reg (code, sreg, sreg);
2619         br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2620         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2621
2622         br[1] = code; x86_jump8 (code, 0);
2623
2624         x86_patch (br[0], code);
2625         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2626         x86_patch (br[1], code);
2627         x86_patch (br[4], code);
2628 #else /* PLATFORM_WIN32 */
2629         x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
2630 #endif
2631         if (tree->flags & MONO_INST_INIT) {
2632                 int offset = 0;
2633                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
2634                         x86_push_reg (code, X86_EAX);
2635                         offset += 4;
2636                 }
2637                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
2638                         x86_push_reg (code, X86_ECX);
2639                         offset += 4;
2640                 }
2641                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
2642                         x86_push_reg (code, X86_EDI);
2643                         offset += 4;
2644                 }
2645                 
2646                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
2647                 if (sreg != X86_ECX)
2648                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
2649                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2650                                 
2651                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2652                 x86_cld (code);
2653                 x86_prefix (code, X86_REP_PREFIX);
2654                 x86_stosl (code);
2655                 
2656                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2657                         x86_pop_reg (code, X86_EDI);
2658                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2659                         x86_pop_reg (code, X86_ECX);
2660                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2661                         x86_pop_reg (code, X86_EAX);
2662         }
2663         return code;
2664 }
2665
2666
2667 static guint8*
2668 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2669 {
2670         CallInfo *cinfo;
2671         int quad;
2672
2673         /* Move return value to the target register */
2674         switch (ins->opcode) {
2675         case CEE_CALL:
2676         case OP_CALL_REG:
2677         case OP_CALL_MEMBASE:
2678                 if (ins->dreg != X86_EAX)
2679                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2680                 break;
2681         case OP_VCALL:
2682         case OP_VCALL_REG:
2683         case OP_VCALL_MEMBASE:
2684                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
2685                 if (cinfo->ret.storage == ArgValuetypeInReg) {
2686                         /* Pop the destination address from the stack */
2687                         x86_pop_reg (code, X86_ECX);
2688                         
2689                         for (quad = 0; quad < 2; quad ++) {
2690                                 switch (cinfo->ret.pair_storage [quad]) {
2691                                 case ArgInIReg:
2692                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
2693                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
2694                                         break;
2695                                 case ArgNone:
2696                                         break;
2697                                 default:
2698                                         g_assert_not_reached ();
2699                                 }
2700                         }
2701                 }
2702                 g_free (cinfo);
2703         default:
2704                 break;
2705         }
2706
2707         return code;
2708 }
2709
2710 static guint8*
2711 emit_tls_get (guint8* code, int dreg, int tls_offset)
2712 {
2713 #ifdef PLATFORM_WIN32
2714         /* 
2715          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
2716          * Journal and/or a disassembly of the TlsGet () function.
2717          */
2718         g_assert (tls_offset < 64);
2719         x86_prefix (code, X86_FS_PREFIX);
2720         x86_mov_reg_mem (code, dreg, 0x18, 4);
2721         /* Dunno what this does but TlsGetValue () contains it */
2722         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
2723         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
2724 #else
2725         x86_prefix (code, X86_GS_PREFIX);
2726         x86_mov_reg_mem (code, dreg, tls_offset, 4);                    
2727 #endif
2728         return code;
2729 }
2730
2731 #define REAL_PRINT_REG(text,reg) \
2732 mono_assert (reg >= 0); \
2733 x86_push_reg (code, X86_EAX); \
2734 x86_push_reg (code, X86_EDX); \
2735 x86_push_reg (code, X86_ECX); \
2736 x86_push_reg (code, reg); \
2737 x86_push_imm (code, reg); \
2738 x86_push_imm (code, text " %d %p\n"); \
2739 x86_mov_reg_imm (code, X86_EAX, printf); \
2740 x86_call_reg (code, X86_EAX); \
2741 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2742 x86_pop_reg (code, X86_ECX); \
2743 x86_pop_reg (code, X86_EDX); \
2744 x86_pop_reg (code, X86_EAX);
2745
2746 /* benchmark and set based on cpu */
2747 #define LOOP_ALIGNMENT 8
2748 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2749
2750 void
2751 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2752 {
2753         MonoInst *ins;
2754         MonoCallInst *call;
2755         guint offset;
2756         guint8 *code = cfg->native_code + cfg->code_len;
2757         MonoInst *last_ins = NULL;
2758         guint last_offset = 0;
2759         int max_len, cpos;
2760
2761         if (cfg->opt & MONO_OPT_PEEPHOLE)
2762                 peephole_pass (cfg, bb);
2763
2764         if (cfg->opt & MONO_OPT_LOOP) {
2765                 int pad, align = LOOP_ALIGNMENT;
2766                 /* set alignment depending on cpu */
2767                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2768                         pad = align - pad;
2769                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2770                         x86_padding (code, pad);
2771                         cfg->code_len += pad;
2772                         bb->native_offset = cfg->code_len;
2773                 }
2774         }
2775
2776         if (cfg->verbose_level > 2)
2777                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2778
2779         cpos = bb->max_offset;
2780
2781         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2782                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2783                 g_assert (!cfg->compile_aot);
2784                 cpos += 6;
2785
2786                 cov->data [bb->dfn].cil_code = bb->cil_code;
2787                 /* this is not thread save, but good enough */
2788                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2789         }
2790
2791         offset = code - cfg->native_code;
2792
2793         ins = bb->code;
2794         while (ins) {
2795                 offset = code - cfg->native_code;
2796
2797                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2798
2799                 if (offset > (cfg->code_size - max_len - 16)) {
2800                         cfg->code_size *= 2;
2801                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2802                         code = cfg->native_code + offset;
2803                         mono_jit_stats.code_reallocs++;
2804                 }
2805
2806                 mono_debug_record_line_number (cfg, ins, offset);
2807
2808                 switch (ins->opcode) {
2809                 case OP_BIGMUL:
2810                         x86_mul_reg (code, ins->sreg2, TRUE);
2811                         break;
2812                 case OP_BIGMUL_UN:
2813                         x86_mul_reg (code, ins->sreg2, FALSE);
2814                         break;
2815                 case OP_X86_SETEQ_MEMBASE:
2816                 case OP_X86_SETNE_MEMBASE:
2817                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2818                                          ins->inst_basereg, ins->inst_offset, TRUE);
2819                         break;
2820                 case OP_STOREI1_MEMBASE_IMM:
2821                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2822                         break;
2823                 case OP_STOREI2_MEMBASE_IMM:
2824                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2825                         break;
2826                 case OP_STORE_MEMBASE_IMM:
2827                 case OP_STOREI4_MEMBASE_IMM:
2828                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2829                         break;
2830                 case OP_STOREI1_MEMBASE_REG:
2831                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2832                         break;
2833                 case OP_STOREI2_MEMBASE_REG:
2834                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2835                         break;
2836                 case OP_STORE_MEMBASE_REG:
2837                 case OP_STOREI4_MEMBASE_REG:
2838                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2839                         break;
2840                 case CEE_LDIND_I:
2841                 case CEE_LDIND_I4:
2842                 case CEE_LDIND_U4:
2843                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2844                         break;
2845                 case OP_LOADU4_MEM:
2846                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2847                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2848                         break;
2849                 case OP_LOAD_MEMBASE:
2850                 case OP_LOADI4_MEMBASE:
2851                 case OP_LOADU4_MEMBASE:
2852                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2853                         break;
2854                 case OP_LOADU1_MEMBASE:
2855                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2856                         break;
2857                 case OP_LOADI1_MEMBASE:
2858                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2859                         break;
2860                 case OP_LOADU2_MEMBASE:
2861                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2862                         break;
2863                 case OP_LOADI2_MEMBASE:
2864                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2865                         break;
2866                 case CEE_CONV_I1:
2867                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2868                         break;
2869                 case CEE_CONV_I2:
2870                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2871                         break;
2872                 case CEE_CONV_U1:
2873                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2874                         break;
2875                 case CEE_CONV_U2:
2876                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2877                         break;
2878                 case OP_COMPARE:
2879                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2880                         break;
2881                 case OP_COMPARE_IMM:
2882                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2883                         break;
2884                 case OP_X86_COMPARE_MEMBASE_REG:
2885                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2886                         break;
2887                 case OP_X86_COMPARE_MEMBASE_IMM:
2888                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2889                         break;
2890                 case OP_X86_COMPARE_MEMBASE8_IMM:
2891                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2892                         break;
2893                 case OP_X86_COMPARE_REG_MEMBASE:
2894                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2895                         break;
2896                 case OP_X86_COMPARE_MEM_IMM:
2897                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2898                         break;
2899                 case OP_X86_TEST_NULL:
2900                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2901                         break;
2902                 case OP_X86_ADD_MEMBASE_IMM:
2903                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2904                         break;
2905                 case OP_X86_ADD_MEMBASE:
2906                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2907                         break;
2908                 case OP_X86_SUB_MEMBASE_IMM:
2909                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2910                         break;
2911                 case OP_X86_SUB_MEMBASE:
2912                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2913                         break;
2914                 case OP_X86_INC_MEMBASE:
2915                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2916                         break;
2917                 case OP_X86_INC_REG:
2918                         x86_inc_reg (code, ins->dreg);
2919                         break;
2920                 case OP_X86_DEC_MEMBASE:
2921                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2922                         break;
2923                 case OP_X86_DEC_REG:
2924                         x86_dec_reg (code, ins->dreg);
2925                         break;
2926                 case OP_X86_MUL_MEMBASE:
2927                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2928                         break;
2929                 case CEE_BREAK:
2930                         x86_breakpoint (code);
2931                         break;
2932                 case OP_ADDCC:
2933                 case CEE_ADD:
2934                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2935                         break;
2936                 case OP_ADC:
2937                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2938                         break;
2939                 case OP_ADDCC_IMM:
2940                 case OP_ADD_IMM:
2941                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2942                         break;
2943                 case OP_ADC_IMM:
2944                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2945                         break;
2946                 case OP_SUBCC:
2947                 case CEE_SUB:
2948                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2949                         break;
2950                 case OP_SBB:
2951                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2952                         break;
2953                 case OP_SUBCC_IMM:
2954                 case OP_SUB_IMM:
2955                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2956                         break;
2957                 case OP_SBB_IMM:
2958                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2959                         break;
2960                 case CEE_AND:
2961                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2962                         break;
2963                 case OP_AND_IMM:
2964                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2965                         break;
2966                 case CEE_DIV:
2967                         x86_cdq (code);
2968                         x86_div_reg (code, ins->sreg2, TRUE);
2969                         break;
2970                 case CEE_DIV_UN:
2971                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2972                         x86_div_reg (code, ins->sreg2, FALSE);
2973                         break;
2974                 case OP_DIV_IMM:
2975                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2976                         x86_cdq (code);
2977                         x86_div_reg (code, ins->sreg2, TRUE);
2978                         break;
2979                 case CEE_REM:
2980                         x86_cdq (code);
2981                         x86_div_reg (code, ins->sreg2, TRUE);
2982                         break;
2983                 case CEE_REM_UN:
2984                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2985                         x86_div_reg (code, ins->sreg2, FALSE);
2986                         break;
2987                 case OP_REM_IMM:
2988                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2989                         x86_cdq (code);
2990                         x86_div_reg (code, ins->sreg2, TRUE);
2991                         break;
2992                 case CEE_OR:
2993                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2994                         break;
2995                 case OP_OR_IMM:
2996                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2997                         break;
2998                 case CEE_XOR:
2999                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
3000                         break;
3001                 case OP_XOR_IMM:
3002                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
3003                         break;
3004                 case CEE_SHL:
3005                         g_assert (ins->sreg2 == X86_ECX);
3006                         x86_shift_reg (code, X86_SHL, ins->dreg);
3007                         break;
3008                 case CEE_SHR:
3009                         g_assert (ins->sreg2 == X86_ECX);
3010                         x86_shift_reg (code, X86_SAR, ins->dreg);
3011                         break;
3012                 case OP_SHR_IMM:
3013                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
3014                         break;
3015                 case OP_SHR_UN_IMM:
3016                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
3017                         break;
3018                 case CEE_SHR_UN:
3019                         g_assert (ins->sreg2 == X86_ECX);
3020                         x86_shift_reg (code, X86_SHR, ins->dreg);
3021                         break;
3022                 case OP_SHL_IMM:
3023                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
3024                         break;
3025                 case OP_LSHL: {
3026                         guint8 *jump_to_end;
3027
3028                         /* handle shifts below 32 bits */
3029                         x86_shld_reg (code, ins->unused, ins->sreg1);
3030                         x86_shift_reg (code, X86_SHL, ins->sreg1);
3031
3032                         x86_test_reg_imm (code, X86_ECX, 32);
3033                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3034
3035                         /* handle shift over 32 bit */
3036                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3037                         x86_clear_reg (code, ins->sreg1);
3038                         
3039                         x86_patch (jump_to_end, code);
3040                         }
3041                         break;
3042                 case OP_LSHR: {
3043                         guint8 *jump_to_end;
3044
3045                         /* handle shifts below 32 bits */
3046                         x86_shrd_reg (code, ins->sreg1, ins->unused);
3047                         x86_shift_reg (code, X86_SAR, ins->unused);
3048
3049                         x86_test_reg_imm (code, X86_ECX, 32);
3050                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
3051
3052                         /* handle shifts over 31 bits */
3053                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3054                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
3055                         
3056                         x86_patch (jump_to_end, code);
3057                         }
3058                         break;
3059                 case OP_LSHR_UN: {
3060                         guint8 *jump_to_end;
3061
3062                         /* handle shifts below 32 bits */
3063                         x86_shrd_reg (code, ins->sreg1, ins->unused);
3064                         x86_shift_reg (code, X86_SHR, ins->unused);
3065
3066                         x86_test_reg_imm (code, X86_ECX, 32);
3067                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
3068
3069                         /* handle shifts over 31 bits */
3070                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3071                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
3072                         
3073                         x86_patch (jump_to_end, code);
3074                         }
3075                         break;
3076                 case OP_LSHL_IMM:
3077                         if (ins->inst_imm >= 32) {
3078                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
3079                                 x86_clear_reg (code, ins->sreg1);
3080                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
3081                         } else {
3082                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
3083                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
3084                         }
3085                         break;
3086                 case OP_LSHR_IMM:
3087                         if (ins->inst_imm >= 32) {
3088                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
3089                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
3090                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
3091                         } else {
3092                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3093                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
3094                         }
3095                         break;
3096                 case OP_LSHR_UN_IMM:
3097                         if (ins->inst_imm >= 32) {
3098                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
3099                                 x86_clear_reg (code, ins->unused);
3100                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
3101                         } else {
3102                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
3103                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
3104                         }
3105                         break;
3106                 case CEE_NOT:
3107                         x86_not_reg (code, ins->sreg1);
3108                         break;
3109                 case CEE_NEG:
3110                         x86_neg_reg (code, ins->sreg1);
3111                         break;
3112                 case OP_SEXT_I1:
3113                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
3114                         break;
3115                 case OP_SEXT_I2:
3116                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
3117                         break;
3118                 case CEE_MUL:
3119                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3120                         break;
3121                 case OP_MUL_IMM:
3122                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
3123                         break;
3124                 case CEE_MUL_OVF:
3125                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
3126                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3127                         break;
3128                 case CEE_MUL_OVF_UN: {
3129                         /* the mul operation and the exception check should most likely be split */
3130                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
3131                         /*g_assert (ins->sreg2 == X86_EAX);
3132                         g_assert (ins->dreg == X86_EAX);*/
3133                         if (ins->sreg2 == X86_EAX) {
3134                                 non_eax_reg = ins->sreg1;
3135                         } else if (ins->sreg1 == X86_EAX) {
3136                                 non_eax_reg = ins->sreg2;
3137                         } else {
3138                                 /* no need to save since we're going to store to it anyway */
3139                                 if (ins->dreg != X86_EAX) {
3140                                         saved_eax = TRUE;
3141                                         x86_push_reg (code, X86_EAX);
3142                                 }
3143                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
3144                                 non_eax_reg = ins->sreg2;
3145                         }
3146                         if (ins->dreg == X86_EDX) {
3147                                 if (!saved_eax) {
3148                                         saved_eax = TRUE;
3149                                         x86_push_reg (code, X86_EAX);
3150                                 }
3151                         } else if (ins->dreg != X86_EAX) {
3152                                 saved_edx = TRUE;
3153                                 x86_push_reg (code, X86_EDX);
3154                         }
3155                         x86_mul_reg (code, non_eax_reg, FALSE);
3156                         /* save before the check since pop and mov don't change the flags */
3157                         if (ins->dreg != X86_EAX)
3158                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3159                         if (saved_edx)
3160                                 x86_pop_reg (code, X86_EDX);
3161                         if (saved_eax)
3162                                 x86_pop_reg (code, X86_EAX);
3163                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
3164                         break;
3165                 }
3166                 case OP_ICONST:
3167                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
3168                         break;
3169                 case OP_AOTCONST:
3170                         g_assert_not_reached ();
3171                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
3172                         x86_mov_reg_imm (code, ins->dreg, 0);
3173                         break;
3174                 case OP_LOAD_GOTADDR:
3175                         x86_call_imm (code, 0);
3176                         /* 
3177                          * The patch needs to point to the pop, since the GOT offset needs 
3178                          * to be added to that address.
3179                          */
3180                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3181                         x86_pop_reg (code, ins->dreg);
3182                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
3183                         break;
3184                 case OP_GOT_ENTRY:
3185                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3186                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
3187                         break;
3188                 case OP_X86_PUSH_GOT_ENTRY:
3189                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
3190                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
3191                         break;
3192                 case CEE_CONV_I4:
3193                 case OP_MOVE:
3194                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3195                         break;
3196                 case CEE_CONV_U4:
3197                         g_assert_not_reached ();
3198                 case CEE_JMP: {
3199                         /*
3200                          * Note: this 'frame destruction' logic is useful for tail calls, too.
3201                          * Keep in sync with the code in emit_epilog.
3202                          */
3203                         int pos = 0;
3204
3205                         /* FIXME: no tracing support... */
3206                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3207                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
3208                         /* reset offset to make max_len work */
3209                         offset = code - cfg->native_code;
3210
3211                         g_assert (!cfg->method->save_lmf);
3212
3213                         if (cfg->used_int_regs & (1 << X86_EBX))
3214                                 pos -= 4;
3215                         if (cfg->used_int_regs & (1 << X86_EDI))
3216                                 pos -= 4;
3217                         if (cfg->used_int_regs & (1 << X86_ESI))
3218                                 pos -= 4;
3219                         if (pos)
3220                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3221         
3222                         if (cfg->used_int_regs & (1 << X86_ESI))
3223                                 x86_pop_reg (code, X86_ESI);
3224                         if (cfg->used_int_regs & (1 << X86_EDI))
3225                                 x86_pop_reg (code, X86_EDI);
3226                         if (cfg->used_int_regs & (1 << X86_EBX))
3227                                 x86_pop_reg (code, X86_EBX);
3228         
3229                         /* restore ESP/EBP */
3230                         x86_leave (code);
3231                         offset = code - cfg->native_code;
3232                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
3233                         x86_jump32 (code, 0);
3234                         break;
3235                 }
3236                 case OP_CHECK_THIS:
3237                         /* ensure ins->sreg1 is not NULL
3238                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
3239                          * cmp DWORD PTR [eax], 0
3240                          */
3241                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
3242                         break;
3243                 case OP_ARGLIST: {
3244                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
3245                         x86_push_reg (code, hreg);
3246                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
3247                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
3248                         x86_pop_reg (code, hreg);
3249                         break;
3250                 }
3251                 case OP_FCALL:
3252                 case OP_LCALL:
3253                 case OP_VCALL:
3254                 case OP_VOIDCALL:
3255                 case CEE_CALL:
3256                         call = (MonoCallInst*)ins;
3257                         if (ins->flags & MONO_INST_HAS_METHOD)
3258                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
3259                         else
3260                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
3261                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3262                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
3263                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
3264                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
3265                                  * smart enough to do that optimization yet
3266                                  *
3267                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
3268                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
3269                                  * (most likely from locality benefits). People with other processors should
3270                                  * check on theirs to see what happens.
3271                                  */
3272                                 if (call->stack_usage == 4) {
3273                                         /* we want to use registers that won't get used soon, so use
3274                                          * ecx, as eax will get allocated first. edx is used by long calls,
3275                                          * so we can't use that.
3276                                          */
3277                                         
3278                                         x86_pop_reg (code, X86_ECX);
3279                                 } else {
3280                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3281                                 }
3282                         }
3283                         code = emit_move_return_value (cfg, ins, code);
3284                         break;
3285                 case OP_FCALL_REG:
3286                 case OP_LCALL_REG:
3287                 case OP_VCALL_REG:
3288                 case OP_VOIDCALL_REG:
3289                 case OP_CALL_REG:
3290                         call = (MonoCallInst*)ins;
3291                         x86_call_reg (code, ins->sreg1);
3292                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3293                                 if (call->stack_usage == 4)
3294                                         x86_pop_reg (code, X86_ECX);
3295                                 else
3296                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3297                         }
3298                         code = emit_move_return_value (cfg, ins, code);
3299                         break;
3300                 case OP_FCALL_MEMBASE:
3301                 case OP_LCALL_MEMBASE:
3302                 case OP_VCALL_MEMBASE:
3303                 case OP_VOIDCALL_MEMBASE:
3304                 case OP_CALL_MEMBASE:
3305                         call = (MonoCallInst*)ins;
3306                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
3307                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
3308                                 if (call->stack_usage == 4)
3309                                         x86_pop_reg (code, X86_ECX);
3310                                 else
3311                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
3312                         }
3313                         code = emit_move_return_value (cfg, ins, code);
3314                         break;
3315                 case OP_OUTARG:
3316                 case OP_X86_PUSH:
3317                         x86_push_reg (code, ins->sreg1);
3318                         break;
3319                 case OP_X86_PUSH_IMM:
3320                         x86_push_imm (code, ins->inst_imm);
3321                         break;
3322                 case OP_X86_PUSH_MEMBASE:
3323                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
3324                         break;
3325                 case OP_X86_PUSH_OBJ: 
3326                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
3327                         x86_push_reg (code, X86_EDI);
3328                         x86_push_reg (code, X86_ESI);
3329                         x86_push_reg (code, X86_ECX);
3330                         if (ins->inst_offset)
3331                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
3332                         else
3333                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
3334                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
3335                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
3336                         x86_cld (code);
3337                         x86_prefix (code, X86_REP_PREFIX);
3338                         x86_movsd (code);
3339                         x86_pop_reg (code, X86_ECX);
3340                         x86_pop_reg (code, X86_ESI);
3341                         x86_pop_reg (code, X86_EDI);
3342                         break;
3343                 case OP_X86_LEA:
3344                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
3345                         break;
3346                 case OP_X86_LEA_MEMBASE:
3347                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
3348                         break;
3349                 case OP_X86_XCHG:
3350                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
3351                         break;
3352                 case OP_LOCALLOC:
3353                         /* keep alignment */
3354                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
3355                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
3356                         code = mono_emit_stack_alloc (code, ins);
3357                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
3358                         break;
3359                 case CEE_RET:
3360                         x86_ret (code);
3361                         break;
3362                 case CEE_THROW: {
3363                         x86_push_reg (code, ins->sreg1);
3364                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3365                                                           (gpointer)"mono_arch_throw_exception");
3366                         break;
3367                 }
3368                 case OP_RETHROW: {
3369                         x86_push_reg (code, ins->sreg1);
3370                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3371                                                           (gpointer)"mono_arch_rethrow_exception");
3372                         break;
3373                 }
3374                 case OP_CALL_HANDLER: 
3375                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3376                         x86_call_imm (code, 0);
3377                         break;
3378                 case OP_LABEL:
3379                         ins->inst_c0 = code - cfg->native_code;
3380                         break;
3381                 case CEE_BR:
3382                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
3383                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
3384                         //break;
3385                         if (ins->flags & MONO_INST_BRLABEL) {
3386                                 if (ins->inst_i0->inst_c0) {
3387                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
3388                                 } else {
3389                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
3390                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3391                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
3392                                                 x86_jump8 (code, 0);
3393                                         else 
3394                                                 x86_jump32 (code, 0);
3395                                 }
3396                         } else {
3397                                 if (ins->inst_target_bb->native_offset) {
3398                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
3399                                 } else {
3400                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3401                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
3402                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
3403                                                 x86_jump8 (code, 0);
3404                                         else 
3405                                                 x86_jump32 (code, 0);
3406                                 } 
3407                         }
3408                         break;
3409                 case OP_BR_REG:
3410                         x86_jump_reg (code, ins->sreg1);
3411                         break;
3412                 case OP_CEQ:
3413                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3414                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3415                         break;
3416                 case OP_CLT:
3417                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
3418                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3419                         break;
3420                 case OP_CLT_UN:
3421                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3422                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3423                         break;
3424                 case OP_CGT:
3425                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
3426                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3427                         break;
3428                 case OP_CGT_UN:
3429                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3430                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3431                         break;
3432                 case OP_CNE:
3433                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
3434                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3435                         break;
3436                 case OP_COND_EXC_EQ:
3437                 case OP_COND_EXC_NE_UN:
3438                 case OP_COND_EXC_LT:
3439                 case OP_COND_EXC_LT_UN:
3440                 case OP_COND_EXC_GT:
3441                 case OP_COND_EXC_GT_UN:
3442                 case OP_COND_EXC_GE:
3443                 case OP_COND_EXC_GE_UN:
3444                 case OP_COND_EXC_LE:
3445                 case OP_COND_EXC_LE_UN:
3446                 case OP_COND_EXC_OV:
3447                 case OP_COND_EXC_NO:
3448                 case OP_COND_EXC_C:
3449                 case OP_COND_EXC_NC:
3450                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
3451                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
3452                         break;
3453                 case CEE_BEQ:
3454                 case CEE_BNE_UN:
3455                 case CEE_BLT:
3456                 case CEE_BLT_UN:
3457                 case CEE_BGT:
3458                 case CEE_BGT_UN:
3459                 case CEE_BGE:
3460                 case CEE_BGE_UN:
3461                 case CEE_BLE:
3462                 case CEE_BLE_UN:
3463                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
3464                         break;
3465
3466                 /* floating point opcodes */
3467                 case OP_R8CONST: {
3468                         double d = *(double *)ins->inst_p0;
3469
3470                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3471                                 x86_fldz (code);
3472                         } else if (d == 1.0) {
3473                                 x86_fld1 (code);
3474                         } else {
3475                                 if (cfg->compile_aot) {
3476                                         guint32 *val = (guint32*)&d;
3477                                         x86_push_imm (code, val [1]);
3478                                         x86_push_imm (code, val [0]);
3479                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
3480                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3481                                 }
3482                                 else {
3483                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3484                                         x86_fld (code, NULL, TRUE);
3485                                 }
3486                         }
3487                         break;
3488                 }
3489                 case OP_R4CONST: {
3490                         float f = *(float *)ins->inst_p0;
3491
3492                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3493                                 x86_fldz (code);
3494                         } else if (f == 1.0) {
3495                                 x86_fld1 (code);
3496                         } else {
3497                                 if (cfg->compile_aot) {
3498                                         guint32 val = *(guint32*)&f;
3499                                         x86_push_imm (code, val);
3500                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
3501                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3502                                 }
3503                                 else {
3504                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3505                                         x86_fld (code, NULL, FALSE);
3506                                 }
3507                         }
3508                         break;
3509                 }
3510                 case OP_STORER8_MEMBASE_REG:
3511                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3512                         break;
3513                 case OP_LOADR8_SPILL_MEMBASE:
3514                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3515                         x86_fxch (code, 1);
3516                         break;
3517                 case OP_LOADR8_MEMBASE:
3518                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3519                         break;
3520                 case OP_STORER4_MEMBASE_REG:
3521                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3522                         break;
3523                 case OP_LOADR4_MEMBASE:
3524                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3525                         break;
3526                 case CEE_CONV_R4: /* FIXME: change precision */
3527                 case CEE_CONV_R8:
3528                         x86_push_reg (code, ins->sreg1);
3529                         x86_fild_membase (code, X86_ESP, 0, FALSE);
3530                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3531                         break;
3532                 case OP_X86_FP_LOAD_I8:
3533                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3534                         break;
3535                 case OP_X86_FP_LOAD_I4:
3536                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3537                         break;
3538                 case OP_FCONV_TO_I1:
3539                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3540                         break;
3541                 case OP_FCONV_TO_U1:
3542                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3543                         break;
3544                 case OP_FCONV_TO_I2:
3545                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3546                         break;
3547                 case OP_FCONV_TO_U2:
3548                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3549                         break;
3550                 case OP_FCONV_TO_I4:
3551                 case OP_FCONV_TO_I:
3552                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3553                         break;
3554                 case OP_FCONV_TO_I8:
3555                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3556                         x86_fnstcw_membase(code, X86_ESP, 0);
3557                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3558                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3559                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3560                         x86_fldcw_membase (code, X86_ESP, 2);
3561                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3562                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3563                         x86_pop_reg (code, ins->dreg);
3564                         x86_pop_reg (code, ins->unused);
3565                         x86_fldcw_membase (code, X86_ESP, 0);
3566                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3567                         break;
3568                 case OP_LCONV_TO_R_UN: { 
3569                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3570                         guint8 *br;
3571
3572                         /* load 64bit integer to FP stack */
3573                         x86_push_imm (code, 0);
3574                         x86_push_reg (code, ins->sreg2);
3575                         x86_push_reg (code, ins->sreg1);
3576                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3577                         /* store as 80bit FP value */
3578                         x86_fst80_membase (code, X86_ESP, 0);
3579                         
3580                         /* test if lreg is negative */
3581                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3582                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3583         
3584                         /* add correction constant mn */
3585                         x86_fld80_mem (code, mn);
3586                         x86_fld80_membase (code, X86_ESP, 0);
3587                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3588                         x86_fst80_membase (code, X86_ESP, 0);
3589
3590                         x86_patch (br, code);
3591
3592                         x86_fld80_membase (code, X86_ESP, 0);
3593                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
3594
3595                         break;
3596                 }
3597                 case OP_LCONV_TO_OVF_I: {
3598                         guint8 *br [3], *label [1];
3599
3600                         /* 
3601                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3602                          */
3603                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3604
3605                         /* If the low word top bit is set, see if we are negative */
3606                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3607                         /* We are not negative (no top bit set, check for our top word to be zero */
3608                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3609                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3610                         label [0] = code;
3611
3612                         /* throw exception */
3613                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3614                         x86_jump32 (code, 0);
3615         
3616                         x86_patch (br [0], code);
3617                         /* our top bit is set, check that top word is 0xfffffff */
3618                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3619                 
3620                         x86_patch (br [1], code);
3621                         /* nope, emit exception */
3622                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3623                         x86_patch (br [2], label [0]);
3624
3625                         if (ins->dreg != ins->sreg1)
3626                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3627                         break;
3628                 }
3629                 case OP_FADD:
3630                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3631                         break;
3632                 case OP_FSUB:
3633                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3634                         break;          
3635                 case OP_FMUL:
3636                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3637                         break;          
3638                 case OP_FDIV:
3639                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3640                         break;          
3641                 case OP_FNEG:
3642                         x86_fchs (code);
3643                         break;          
3644                 case OP_SIN:
3645                         x86_fsin (code);
3646                         x86_fldz (code);
3647                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3648                         break;          
3649                 case OP_COS:
3650                         x86_fcos (code);
3651                         x86_fldz (code);
3652                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3653                         break;          
3654                 case OP_ABS:
3655                         x86_fabs (code);
3656                         break;          
3657                 case OP_TAN: {
3658                         /* 
3659                          * it really doesn't make sense to inline all this code,
3660                          * it's here just to show that things may not be as simple 
3661                          * as they appear.
3662                          */
3663                         guchar *check_pos, *end_tan, *pop_jump;
3664                         x86_push_reg (code, X86_EAX);
3665                         x86_fptan (code);
3666                         x86_fnstsw (code);
3667                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3668                         check_pos = code;
3669                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3670                         x86_fstp (code, 0); /* pop the 1.0 */
3671                         end_tan = code;
3672                         x86_jump8 (code, 0);
3673                         x86_fldpi (code);
3674                         x86_fp_op (code, X86_FADD, 0);
3675                         x86_fxch (code, 1);
3676                         x86_fprem1 (code);
3677                         x86_fstsw (code);
3678                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3679                         pop_jump = code;
3680                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3681                         x86_fstp (code, 1);
3682                         x86_fptan (code);
3683                         x86_patch (pop_jump, code);
3684                         x86_fstp (code, 0); /* pop the 1.0 */
3685                         x86_patch (check_pos, code);
3686                         x86_patch (end_tan, code);
3687                         x86_fldz (code);
3688                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3689                         x86_pop_reg (code, X86_EAX);
3690                         break;
3691                 }
3692                 case OP_ATAN:
3693                         x86_fld1 (code);
3694                         x86_fpatan (code);
3695                         x86_fldz (code);
3696                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3697                         break;          
3698                 case OP_SQRT:
3699                         x86_fsqrt (code);
3700                         break;          
3701                 case OP_X86_FPOP:
3702                         x86_fstp (code, 0);
3703                         break;          
3704                 case OP_FREM: {
3705                         guint8 *l1, *l2;
3706
3707                         x86_push_reg (code, X86_EAX);
3708                         /* we need to exchange ST(0) with ST(1) */
3709                         x86_fxch (code, 1);
3710
3711                         /* this requires a loop, because fprem somtimes 
3712                          * returns a partial remainder */
3713                         l1 = code;
3714                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3715                         /* x86_fprem1 (code); */
3716                         x86_fprem (code);
3717                         x86_fnstsw (code);
3718                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3719                         l2 = code + 2;
3720                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3721
3722                         /* pop result */
3723                         x86_fstp (code, 1);
3724
3725                         x86_pop_reg (code, X86_EAX);
3726                         break;
3727                 }
3728                 case OP_FCOMPARE:
3729                         if (cfg->opt & MONO_OPT_FCMOV) {
3730                                 x86_fcomip (code, 1);
3731                                 x86_fstp (code, 0);
3732                                 break;
3733                         }
3734                         /* this overwrites EAX */
3735                         EMIT_FPCOMPARE(code);
3736                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3737                         break;
3738                 case OP_FCEQ:
3739                         if (cfg->opt & MONO_OPT_FCMOV) {
3740                                 /* zeroing the register at the start results in 
3741                                  * shorter and faster code (we can also remove the widening op)
3742                                  */
3743                                 guchar *unordered_check;
3744                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3745                                 x86_fcomip (code, 1);
3746                                 x86_fstp (code, 0);
3747                                 unordered_check = code;
3748                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3749                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3750                                 x86_patch (unordered_check, code);
3751                                 break;
3752                         }
3753                         if (ins->dreg != X86_EAX) 
3754                                 x86_push_reg (code, X86_EAX);
3755
3756                         EMIT_FPCOMPARE(code);
3757                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3758                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3759                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3760                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3761
3762                         if (ins->dreg != X86_EAX) 
3763                                 x86_pop_reg (code, X86_EAX);
3764                         break;
3765                 case OP_FCLT:
3766                 case OP_FCLT_UN:
3767                         if (cfg->opt & MONO_OPT_FCMOV) {
3768                                 /* zeroing the register at the start results in 
3769                                  * shorter and faster code (we can also remove the widening op)
3770                                  */
3771                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3772                                 x86_fcomip (code, 1);
3773                                 x86_fstp (code, 0);
3774                                 if (ins->opcode == OP_FCLT_UN) {
3775                                         guchar *unordered_check = code;
3776                                         guchar *jump_to_end;
3777                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3778                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3779                                         jump_to_end = code;
3780                                         x86_jump8 (code, 0);
3781                                         x86_patch (unordered_check, code);
3782                                         x86_inc_reg (code, ins->dreg);
3783                                         x86_patch (jump_to_end, code);
3784                                 } else {
3785                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3786                                 }
3787                                 break;
3788                         }
3789                         if (ins->dreg != X86_EAX) 
3790                                 x86_push_reg (code, X86_EAX);
3791
3792                         EMIT_FPCOMPARE(code);
3793                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3794                         if (ins->opcode == OP_FCLT_UN) {
3795                                 guchar *is_not_zero_check, *end_jump;
3796                                 is_not_zero_check = code;
3797                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3798                                 end_jump = code;
3799                                 x86_jump8 (code, 0);
3800                                 x86_patch (is_not_zero_check, code);
3801                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3802
3803                                 x86_patch (end_jump, code);
3804                         }
3805                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3806                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3807
3808                         if (ins->dreg != X86_EAX) 
3809                                 x86_pop_reg (code, X86_EAX);
3810                         break;
3811                 case OP_FCGT:
3812                 case OP_FCGT_UN:
3813                         if (cfg->opt & MONO_OPT_FCMOV) {
3814                                 /* zeroing the register at the start results in 
3815                                  * shorter and faster code (we can also remove the widening op)
3816                                  */
3817                                 guchar *unordered_check;
3818                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3819                                 x86_fcomip (code, 1);
3820                                 x86_fstp (code, 0);
3821                                 if (ins->opcode == OP_FCGT) {
3822                                         unordered_check = code;
3823                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3824                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3825                                         x86_patch (unordered_check, code);
3826                                 } else {
3827                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3828                                 }
3829                                 break;
3830                         }
3831                         if (ins->dreg != X86_EAX) 
3832                                 x86_push_reg (code, X86_EAX);
3833
3834                         EMIT_FPCOMPARE(code);
3835                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3836                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3837                         if (ins->opcode == OP_FCGT_UN) {
3838                                 guchar *is_not_zero_check, *end_jump;
3839                                 is_not_zero_check = code;
3840                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3841                                 end_jump = code;
3842                                 x86_jump8 (code, 0);
3843                                 x86_patch (is_not_zero_check, code);
3844                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3845         
3846                                 x86_patch (end_jump, code);
3847                         }
3848                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3849                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3850
3851                         if (ins->dreg != X86_EAX) 
3852                                 x86_pop_reg (code, X86_EAX);
3853                         break;
3854                 case OP_FBEQ:
3855                         if (cfg->opt & MONO_OPT_FCMOV) {
3856                                 guchar *jump = code;
3857                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3858                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3859                                 x86_patch (jump, code);
3860                                 break;
3861                         }
3862                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3863                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3864                         break;
3865                 case OP_FBNE_UN:
3866                         /* Branch if C013 != 100 */
3867                         if (cfg->opt & MONO_OPT_FCMOV) {
3868                                 /* branch if !ZF or (PF|CF) */
3869                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3870                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3871                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3872                                 break;
3873                         }
3874                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3875                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3876                         break;
3877                 case OP_FBLT:
3878                         if (cfg->opt & MONO_OPT_FCMOV) {
3879                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3880                                 break;
3881                         }
3882                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3883                         break;
3884                 case OP_FBLT_UN:
3885                         if (cfg->opt & MONO_OPT_FCMOV) {
3886                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3887                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3888                                 break;
3889                         }
3890                         if (ins->opcode == OP_FBLT_UN) {
3891                                 guchar *is_not_zero_check, *end_jump;
3892                                 is_not_zero_check = code;
3893                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3894                                 end_jump = code;
3895                                 x86_jump8 (code, 0);
3896                                 x86_patch (is_not_zero_check, code);
3897                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3898
3899                                 x86_patch (end_jump, code);
3900                         }
3901                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3902                         break;
3903                 case OP_FBGT:
3904                 case OP_FBGT_UN:
3905                         if (cfg->opt & MONO_OPT_FCMOV) {
3906                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3907                                 break;
3908                         }
3909                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3910                         if (ins->opcode == OP_FBGT_UN) {
3911                                 guchar *is_not_zero_check, *end_jump;
3912                                 is_not_zero_check = code;
3913                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3914                                 end_jump = code;
3915                                 x86_jump8 (code, 0);
3916                                 x86_patch (is_not_zero_check, code);
3917                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3918
3919                                 x86_patch (end_jump, code);
3920                         }
3921                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3922                         break;
3923                 case OP_FBGE:
3924                         /* Branch if C013 == 100 or 001 */
3925                         if (cfg->opt & MONO_OPT_FCMOV) {
3926                                 guchar *br1;
3927
3928                                 /* skip branch if C1=1 */
3929                                 br1 = code;
3930                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3931                                 /* branch if (C0 | C3) = 1 */
3932                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3933                                 x86_patch (br1, code);
3934                                 break;
3935                         }
3936                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3937                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3938                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3939                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3940                         break;
3941                 case OP_FBGE_UN:
3942                         /* Branch if C013 == 000 */
3943                         if (cfg->opt & MONO_OPT_FCMOV) {
3944                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3945                                 break;
3946                         }
3947                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3948                         break;
3949                 case OP_FBLE:
3950                         /* Branch if C013=000 or 100 */
3951                         if (cfg->opt & MONO_OPT_FCMOV) {
3952                                 guchar *br1;
3953
3954                                 /* skip branch if C1=1 */
3955                                 br1 = code;
3956                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3957                                 /* branch if C0=0 */
3958                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3959                                 x86_patch (br1, code);
3960                                 break;
3961                         }
3962                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3963                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3964                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3965                         break;
3966                 case OP_FBLE_UN:
3967                         /* Branch if C013 != 001 */
3968                         if (cfg->opt & MONO_OPT_FCMOV) {
3969                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3970                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3971                                 break;
3972                         }
3973                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3974                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3975                         break;
3976                 case CEE_CKFINITE: {
3977                         x86_push_reg (code, X86_EAX);
3978                         x86_fxam (code);
3979                         x86_fnstsw (code);
3980                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3981                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3982                         x86_pop_reg (code, X86_EAX);
3983                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3984                         break;
3985                 }
3986                 case OP_TLS_GET: {
3987                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
3988                         break;
3989                 }
3990                 case OP_ATOMIC_ADD_I4: {
3991                         int dreg = ins->dreg;
3992
3993                         if (dreg == ins->inst_basereg) {
3994                                 x86_push_reg (code, ins->sreg2);
3995                                 dreg = ins->sreg2;
3996                         } 
3997                         
3998                         if (dreg != ins->sreg2)
3999                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
4000
4001                         x86_prefix (code, X86_LOCK_PREFIX);
4002                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4003
4004                         if (dreg != ins->dreg) {
4005                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4006                                 x86_pop_reg (code, dreg);
4007                         }
4008
4009                         break;
4010                 }
4011                 case OP_ATOMIC_ADD_NEW_I4: {
4012                         int dreg = ins->dreg;
4013
4014                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
4015                         if (ins->sreg2 == dreg) {
4016                                 if (dreg == X86_EBX) {
4017                                         dreg = X86_EDI;
4018                                         if (ins->inst_basereg == X86_EDI)
4019                                                 dreg = X86_ESI;
4020                                 } else {
4021                                         dreg = X86_EBX;
4022                                         if (ins->inst_basereg == X86_EBX)
4023                                                 dreg = X86_EDI;
4024                                 }
4025                         } else if (ins->inst_basereg == dreg) {
4026                                 if (dreg == X86_EBX) {
4027                                         dreg = X86_EDI;
4028                                         if (ins->sreg2 == X86_EDI)
4029                                                 dreg = X86_ESI;
4030                                 } else {
4031                                         dreg = X86_EBX;
4032                                         if (ins->sreg2 == X86_EBX)
4033                                                 dreg = X86_EDI;
4034                                 }
4035                         }
4036
4037                         if (dreg != ins->dreg) {
4038                                 x86_push_reg (code, dreg);
4039                         }
4040
4041                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
4042                         x86_prefix (code, X86_LOCK_PREFIX);
4043                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
4044                         /* dreg contains the old value, add with sreg2 value */
4045                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
4046                         
4047                         if (ins->dreg != dreg) {
4048                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
4049                                 x86_pop_reg (code, dreg);
4050                         }
4051
4052                         break;
4053                 }
4054                 case OP_ATOMIC_EXCHANGE_I4: {
4055                         guchar *br[2];
4056                         int sreg2 = ins->sreg2;
4057                         int breg = ins->inst_basereg;
4058
4059                         /* cmpxchg uses eax as comperand, need to make sure we can use it
4060                          * hack to overcome limits in x86 reg allocator 
4061                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
4062                          */
4063                         if (ins->dreg != X86_EAX)
4064                                 x86_push_reg (code, X86_EAX);
4065                         
4066                         /* We need the EAX reg for the cmpxchg */
4067                         if (ins->sreg2 == X86_EAX) {
4068                                 x86_push_reg (code, X86_EDX);
4069                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
4070                                 sreg2 = X86_EDX;
4071                         }
4072
4073                         if (breg == X86_EAX) {
4074                                 x86_push_reg (code, X86_ESI);
4075                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
4076                                 breg = X86_ESI;
4077                         }
4078
4079                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
4080
4081                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
4082                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
4083                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
4084                         x86_patch (br [1], br [0]);
4085
4086                         if (breg != ins->inst_basereg)
4087                                 x86_pop_reg (code, X86_ESI);
4088
4089                         if (ins->dreg != X86_EAX) {
4090                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
4091                                 x86_pop_reg (code, X86_EAX);
4092                         }
4093
4094                         if (ins->sreg2 != sreg2)
4095                                 x86_pop_reg (code, X86_EDX);
4096
4097                         break;
4098                 }
4099                 default:
4100                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
4101                         g_assert_not_reached ();
4102                 }
4103
4104                 if ((code - cfg->native_code - offset) > max_len) {
4105                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4106                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4107                         g_assert_not_reached ();
4108                 }
4109                
4110                 cpos += max_len;
4111
4112                 last_ins = ins;
4113                 last_offset = offset;
4114                 
4115                 ins = ins->next;
4116         }
4117
4118         cfg->code_len = code - cfg->native_code;
4119 }
4120
4121 void
4122 mono_arch_register_lowlevel_calls (void)
4123 {
4124 }
4125
4126 void
4127 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4128 {
4129         MonoJumpInfo *patch_info;
4130         gboolean compile_aot = !run_cctors;
4131
4132         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4133                 unsigned char *ip = patch_info->ip.i + code;
4134                 const unsigned char *target;
4135
4136                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4137
4138                 if (compile_aot) {
4139                         switch (patch_info->type) {
4140                         case MONO_PATCH_INFO_BB:
4141                         case MONO_PATCH_INFO_LABEL:
4142                                 break;
4143                         default:
4144                                 /* No need to patch these */
4145                                 continue;
4146                         }
4147                 }
4148
4149                 switch (patch_info->type) {
4150                 case MONO_PATCH_INFO_IP:
4151                         *((gconstpointer *)(ip)) = target;
4152                         break;
4153                 case MONO_PATCH_INFO_CLASS_INIT: {
4154                         guint8 *code = ip;
4155                         /* Might already been changed to a nop */
4156                         x86_call_code (code, 0);
4157                         x86_patch (ip, target);
4158                         break;
4159                 }
4160                 case MONO_PATCH_INFO_ABS:
4161                 case MONO_PATCH_INFO_METHOD:
4162                 case MONO_PATCH_INFO_METHOD_JUMP:
4163                 case MONO_PATCH_INFO_INTERNAL_METHOD:
4164                 case MONO_PATCH_INFO_BB:
4165                 case MONO_PATCH_INFO_LABEL:
4166                         x86_patch (ip, target);
4167                         break;
4168                 case MONO_PATCH_INFO_NONE:
4169                         break;
4170                 default: {
4171                         guint32 offset = mono_arch_get_patch_offset (ip);
4172                         *((gconstpointer *)(ip + offset)) = target;
4173                         break;
4174                 }
4175                 }
4176         }
4177 }
4178
4179 guint8 *
4180 mono_arch_emit_prolog (MonoCompile *cfg)
4181 {
4182         MonoMethod *method = cfg->method;
4183         MonoBasicBlock *bb;
4184         MonoMethodSignature *sig;
4185         MonoInst *inst;
4186         int alloc_size, pos, max_offset, i;
4187         guint8 *code;
4188
4189         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
4190         code = cfg->native_code = g_malloc (cfg->code_size);
4191
4192         x86_push_reg (code, X86_EBP);
4193         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4194
4195         alloc_size = - cfg->stack_offset;
4196         pos = 0;
4197
4198         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4199                 /* Might need to attach the thread to the JIT */
4200                 if (lmf_tls_offset != -1) {
4201                         guint8 *buf;
4202
4203                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
4204 #ifdef PLATFORM_WIN32
4205                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4206                         /* FIXME: Add a separate key for LMF to avoid this */
4207                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4208 #endif
4209                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
4210                         buf = code;
4211                         x86_branch8 (code, X86_CC_NE, 0, 0);
4212                         x86_push_imm (code, cfg->domain);
4213                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4214                         x86_patch (buf, code);
4215                 }
4216                 else {
4217                         g_assert (!cfg->compile_aot);
4218                         x86_push_imm (code, cfg->domain);
4219                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4220                 }
4221         }
4222
4223         if (method->save_lmf) {
4224                 pos += sizeof (MonoLMF);
4225
4226                 /* save the current IP */
4227                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4228                 x86_push_imm_template (code);
4229
4230                 /* save all caller saved regs */
4231                 x86_push_reg (code, X86_EBP);
4232                 x86_push_reg (code, X86_ESI);
4233                 x86_push_reg (code, X86_EDI);
4234                 x86_push_reg (code, X86_EBX);
4235
4236                 /* save method info */
4237                 x86_push_imm (code, method);
4238
4239                 /* get the address of lmf for the current thread */
4240                 /* 
4241                  * This is performance critical so we try to use some tricks to make
4242                  * it fast.
4243                  */
4244                 if (lmf_tls_offset != -1) {
4245                         /* Load lmf quicky using the GS register */
4246                         code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
4247 #ifdef PLATFORM_WIN32
4248                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4249                         /* FIXME: Add a separate key for LMF to avoid this */
4250                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4251 #endif
4252                 }
4253                 else {
4254                         if (cfg->compile_aot) {
4255                                 /* The GOT var does not exist yet */
4256                                 x86_call_imm (code, 0);
4257                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4258                                 x86_pop_reg (code, X86_EAX);
4259                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4260                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4261                                 x86_call_membase (code, X86_EAX, 0xf0f0f0f0);
4262                         }
4263                         else
4264                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4265                 }
4266
4267                 /* push lmf */
4268                 x86_push_reg (code, X86_EAX); 
4269                 /* push *lfm (previous_lmf) */
4270                 x86_push_membase (code, X86_EAX, 0);
4271                 /* *(lmf) = ESP */
4272                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4273         } else {
4274
4275                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4276                         x86_push_reg (code, X86_EBX);
4277                         pos += 4;
4278                 }
4279
4280                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4281                         x86_push_reg (code, X86_EDI);
4282                         pos += 4;
4283                 }
4284
4285                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4286                         x86_push_reg (code, X86_ESI);
4287                         pos += 4;
4288                 }
4289         }
4290
4291         alloc_size -= pos;
4292
4293         if (alloc_size) {
4294                 /* See mono_emit_stack_alloc */
4295 #ifdef PLATFORM_WIN32
4296                 guint32 remaining_size = alloc_size;
4297                 while (remaining_size >= 0x1000) {
4298                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4299                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4300                         remaining_size -= 0x1000;
4301                 }
4302                 if (remaining_size)
4303                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4304 #else
4305                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4306 #endif
4307         }
4308
4309         /* compute max_offset in order to use short forward jumps */
4310         max_offset = 0;
4311         if (cfg->opt & MONO_OPT_BRANCH) {
4312                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4313                         MonoInst *ins = bb->code;
4314                         bb->max_offset = max_offset;
4315
4316                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4317                                 max_offset += 6;
4318                         /* max alignment for loops */
4319                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4320                                 max_offset += LOOP_ALIGNMENT;
4321
4322                         while (ins) {
4323                                 if (ins->opcode == OP_LABEL)
4324                                         ins->inst_c1 = max_offset;
4325                                 
4326                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
4327                                 ins = ins->next;
4328                         }
4329                 }
4330         }
4331
4332         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4333                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4334
4335         /* load arguments allocated to register from the stack */
4336         sig = mono_method_signature (method);
4337         pos = 0;
4338
4339         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4340                 inst = cfg->varinfo [pos];
4341                 if (inst->opcode == OP_REGVAR) {
4342                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4343                         if (cfg->verbose_level > 2)
4344                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4345                 }
4346                 pos++;
4347         }
4348
4349         cfg->code_len = code - cfg->native_code;
4350
4351         return code;
4352 }
4353
4354 void
4355 mono_arch_emit_epilog (MonoCompile *cfg)
4356 {
4357         MonoMethod *method = cfg->method;
4358         MonoMethodSignature *sig = mono_method_signature (method);
4359         int quad, pos;
4360         guint32 stack_to_pop;
4361         guint8 *code;
4362         int max_epilog_size = 16;
4363         CallInfo *cinfo;
4364         
4365         if (cfg->method->save_lmf)
4366                 max_epilog_size += 128;
4367         
4368         if (mono_jit_trace_calls != NULL)
4369                 max_epilog_size += 50;
4370
4371         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4372                 cfg->code_size *= 2;
4373                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4374                 mono_jit_stats.code_reallocs++;
4375         }
4376
4377         code = cfg->native_code + cfg->code_len;
4378
4379         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4380                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4381
4382         /* the code restoring the registers must be kept in sync with CEE_JMP */
4383         pos = 0;
4384         
4385         if (method->save_lmf) {
4386                 gint32 prev_lmf_reg;
4387
4388                 /* Find a spare register */
4389                 switch (sig->ret->type) {
4390                 case MONO_TYPE_I8:
4391                 case MONO_TYPE_U8:
4392                         prev_lmf_reg = X86_EDI;
4393                         cfg->used_int_regs |= (1 << X86_EDI);
4394                         break;
4395                 default:
4396                         prev_lmf_reg = X86_EDX;
4397                         break;
4398                 }
4399
4400                 /* reg = previous_lmf */
4401                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, -32, 4);
4402
4403                 /* ecx = lmf */
4404                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, -28, 4);
4405
4406                 /* *(lmf) = previous_lmf */
4407                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4408
4409                 /* restore caller saved regs */
4410                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4411                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, -20, 4);
4412                 }
4413
4414                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4415                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, -16, 4);
4416                 }
4417                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4418                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, -12, 4);
4419                 }
4420
4421                 /* EBP is restored by LEAVE */
4422         } else {
4423                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4424                         pos -= 4;
4425                 }
4426                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4427                         pos -= 4;
4428                 }
4429                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4430                         pos -= 4;
4431                 }
4432
4433                 if (pos)
4434                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4435
4436                 if (cfg->used_int_regs & (1 << X86_ESI)) {
4437                         x86_pop_reg (code, X86_ESI);
4438                 }
4439                 if (cfg->used_int_regs & (1 << X86_EDI)) {
4440                         x86_pop_reg (code, X86_EDI);
4441                 }
4442                 if (cfg->used_int_regs & (1 << X86_EBX)) {
4443                         x86_pop_reg (code, X86_EBX);
4444                 }
4445         }
4446
4447         /* Load returned vtypes into registers if needed */
4448         cinfo = get_call_info (sig, FALSE);
4449         if (cinfo->ret.storage == ArgValuetypeInReg) {
4450                 for (quad = 0; quad < 2; quad ++) {
4451                         switch (cinfo->ret.pair_storage [quad]) {
4452                         case ArgInIReg:
4453                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
4454                                 break;
4455                         case ArgOnFloatFpStack:
4456                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
4457                                 break;
4458                         case ArgOnDoubleFpStack:
4459                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
4460                                 break;
4461                         case ArgNone:
4462                                 break;
4463                         default:
4464                                 g_assert_not_reached ();
4465                         }
4466                 }
4467         }
4468
4469         x86_leave (code);
4470
4471         if (CALLCONV_IS_STDCALL (sig)) {
4472                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4473
4474                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4475         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
4476                 stack_to_pop = 4;
4477         else
4478                 stack_to_pop = 0;
4479
4480         if (stack_to_pop)
4481                 x86_ret_imm (code, stack_to_pop);
4482         else
4483                 x86_ret (code);
4484
4485         g_free (cinfo);
4486
4487         cfg->code_len = code - cfg->native_code;
4488
4489         g_assert (cfg->code_len < cfg->code_size);
4490 }
4491
4492 void
4493 mono_arch_emit_exceptions (MonoCompile *cfg)
4494 {
4495         MonoJumpInfo *patch_info;
4496         int nthrows, i;
4497         guint8 *code;
4498         MonoClass *exc_classes [16];
4499         guint8 *exc_throw_start [16], *exc_throw_end [16];
4500         guint32 code_size;
4501         int exc_count = 0;
4502
4503         /* Compute needed space */
4504         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4505                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4506                         exc_count++;
4507         }
4508
4509         /* 
4510          * make sure we have enough space for exceptions
4511          * 16 is the size of two push_imm instructions and a call
4512          */
4513         if (cfg->compile_aot)
4514                 code_size = exc_count * 32;
4515         else
4516                 code_size = exc_count * 16;
4517
4518         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4519                 cfg->code_size *= 2;
4520                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4521                 mono_jit_stats.code_reallocs++;
4522         }
4523
4524         code = cfg->native_code + cfg->code_len;
4525
4526         nthrows = 0;
4527         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4528                 switch (patch_info->type) {
4529                 case MONO_PATCH_INFO_EXC: {
4530                         MonoClass *exc_class;
4531                         guint8 *buf, *buf2;
4532                         guint32 throw_ip;
4533
4534                         x86_patch (patch_info->ip.i + cfg->native_code, code);
4535
4536                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4537                         g_assert (exc_class);
4538                         throw_ip = patch_info->ip.i;
4539
4540                         /* Find a throw sequence for the same exception class */
4541                         for (i = 0; i < nthrows; ++i)
4542                                 if (exc_classes [i] == exc_class)
4543                                         break;
4544                         if (i < nthrows) {
4545                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4546                                 x86_jump_code (code, exc_throw_start [i]);
4547                                 patch_info->type = MONO_PATCH_INFO_NONE;
4548                         }
4549                         else {
4550                                 guint32 got_reg = X86_EAX;
4551                                 guint32 size;
4552
4553                                 /* Compute size of code following the push <OFFSET> */
4554                                 if (cfg->compile_aot) {
4555                                         size = 5 + 6;
4556                                         if (!cfg->got_var)
4557                                                 size += 32;
4558                                         else if (cfg->got_var->opcode == OP_REGOFFSET)
4559                                                 size += 6;
4560                                 }
4561                                 else
4562                                         size = 5 + 5;
4563
4564                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4565                                         /* Use the shorter form */
4566                                         buf = buf2 = code;
4567                                         x86_push_imm (code, 0);
4568                                 }
4569                                 else {
4570                                         buf = code;
4571                                         x86_push_imm (code, 0xf0f0f0f0);
4572                                         buf2 = code;
4573                                 }
4574
4575                                 if (nthrows < 16) {
4576                                         exc_classes [nthrows] = exc_class;
4577                                         exc_throw_start [nthrows] = code;
4578                                 }
4579
4580                                 if (cfg->compile_aot) {          
4581                                         /*
4582                                          * Since the patches are generated by the back end, there is                                     * no way to generate a got_var at this point.   
4583                                          */
4584                                         if (!cfg->got_var) {
4585                                                 x86_call_imm (code, 0);
4586                                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4587                                                 x86_pop_reg (code, X86_EAX);
4588                                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4589                                         }
4590                                         else {
4591                                                 if (cfg->got_var->opcode == OP_REGOFFSET)
4592                                                         x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
4593                                                 else
4594                                                         got_reg = cfg->got_var->dreg;
4595                                         }
4596                                 }
4597
4598                                 x86_push_imm (code, exc_class->type_token);
4599                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4600                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4601                                 patch_info->ip.i = code - cfg->native_code;
4602                                 if (cfg->compile_aot)
4603                                         x86_call_membase (code, got_reg, 0xf0f0f0f0);
4604                                 else
4605                                         x86_call_code (code, 0);
4606                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4607                                 while (buf < buf2)
4608                                         x86_nop (buf);
4609
4610                                 if (nthrows < 16) {
4611                                         exc_throw_end [nthrows] = code;
4612                                         nthrows ++;
4613                                 }
4614                         }
4615                         break;
4616                 }
4617                 default:
4618                         /* do nothing */
4619                         break;
4620                 }
4621         }
4622
4623         cfg->code_len = code - cfg->native_code;
4624
4625         g_assert (cfg->code_len < cfg->code_size);
4626 }
4627
4628 void
4629 mono_arch_flush_icache (guint8 *code, gint size)
4630 {
4631         /* not needed */
4632 }
4633
4634 void
4635 mono_arch_flush_register_windows (void)
4636 {
4637 }
4638
4639 /*
4640  * Support for fast access to the thread-local lmf structure using the GS
4641  * segment register on NPTL + kernel 2.6.x.
4642  */
4643
4644 static gboolean tls_offset_inited = FALSE;
4645
4646 void
4647 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4648 {
4649 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4650         pthread_t self = pthread_self();
4651         pthread_attr_t attr;
4652         void *staddr = NULL;
4653         size_t stsize = 0;
4654         struct sigaltstack sa;
4655 #endif
4656
4657         if (!tls_offset_inited) {
4658                 if (!getenv ("MONO_NO_TLS")) {
4659 #ifdef PLATFORM_WIN32
4660                         /* 
4661                          * We need to init this multiple times, since when we are first called, the key might not
4662                          * be initialized yet.
4663                          */
4664                         appdomain_tls_offset = mono_domain_get_tls_key ();
4665                         lmf_tls_offset = mono_get_jit_tls_key ();
4666                         thread_tls_offset = mono_thread_get_tls_key ();
4667
4668                         /* Only 64 tls entries can be accessed using inline code */
4669                         if (appdomain_tls_offset >= 64)
4670                                 appdomain_tls_offset = -1;
4671                         if (lmf_tls_offset >= 64)
4672                                 lmf_tls_offset = -1;
4673                         if (thread_tls_offset >= 64)
4674                                 thread_tls_offset = -1;
4675 #else
4676                         tls_offset_inited = TRUE;
4677                         appdomain_tls_offset = mono_domain_get_tls_offset ();
4678                         lmf_tls_offset = mono_get_lmf_tls_offset ();
4679                         thread_tls_offset = mono_thread_get_tls_offset ();
4680 #endif
4681                 }
4682         }               
4683
4684 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4685
4686         /* Determine stack boundaries */
4687         if (!mono_running_on_valgrind ()) {
4688 #ifdef HAVE_PTHREAD_GETATTR_NP
4689                 pthread_getattr_np( self, &attr );
4690 #else
4691 #ifdef HAVE_PTHREAD_ATTR_GET_NP
4692                 pthread_attr_get_np( self, &attr );
4693 #elif defined(sun)
4694                 pthread_attr_init( &attr );
4695                 pthread_attr_getstacksize( &attr, &stsize );
4696 #else
4697 #error "Not implemented"
4698 #endif
4699 #endif
4700 #ifndef sun
4701                 pthread_attr_getstack( &attr, &staddr, &stsize );
4702 #endif
4703         }
4704
4705         /* 
4706          * staddr seems to be wrong for the main thread, so we keep the value in
4707          * tls->end_of_stack
4708          */
4709         tls->stack_size = stsize;
4710
4711         /* Setup an alternate signal stack */
4712         tls->signal_stack = g_malloc (SIGNAL_STACK_SIZE);
4713         tls->signal_stack_size = SIGNAL_STACK_SIZE;
4714
4715         sa.ss_sp = tls->signal_stack;
4716         sa.ss_size = SIGNAL_STACK_SIZE;
4717         sa.ss_flags = SS_ONSTACK;
4718         sigaltstack (&sa, NULL);
4719 #endif
4720 }
4721
4722 void
4723 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4724 {
4725 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4726         struct sigaltstack sa;
4727
4728         sa.ss_sp = tls->signal_stack;
4729         sa.ss_size = SIGNAL_STACK_SIZE;
4730         sa.ss_flags = SS_DISABLE;
4731         sigaltstack  (&sa, NULL);
4732
4733         if (tls->signal_stack)
4734                 g_free (tls->signal_stack);
4735 #endif
4736 }
4737
4738 void
4739 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4740 {
4741
4742         /* add the this argument */
4743         if (this_reg != -1) {
4744                 MonoInst *this;
4745                 MONO_INST_NEW (cfg, this, OP_OUTARG);
4746                 this->type = this_type;
4747                 this->sreg1 = this_reg;
4748                 mono_bblock_add_inst (cfg->cbb, this);
4749         }
4750
4751         if (vt_reg != -1) {
4752                 CallInfo * cinfo = get_call_info (inst->signature, FALSE);
4753                 MonoInst *vtarg;
4754
4755                 if (cinfo->ret.storage == ArgValuetypeInReg) {
4756                         /*
4757                          * The valuetype is in EAX:EDX after the call, needs to be copied to
4758                          * the stack. Save the address here, so the call instruction can
4759                          * access it.
4760                          */
4761                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
4762                         vtarg->inst_destbasereg = X86_ESP;
4763                         vtarg->inst_offset = inst->stack_usage;
4764                         vtarg->sreg1 = vt_reg;
4765                         mono_bblock_add_inst (cfg->cbb, vtarg);
4766                 }
4767                 else {
4768                         MonoInst *vtarg;
4769                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4770                         vtarg->type = STACK_MP;
4771                         vtarg->sreg1 = vt_reg;
4772                         mono_bblock_add_inst (cfg->cbb, vtarg);
4773                 }
4774
4775                 g_free (cinfo);
4776         }
4777 }
4778
4779
4780 MonoInst*
4781 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4782 {
4783         MonoInst *ins = NULL;
4784
4785         if (cmethod->klass == mono_defaults.math_class) {
4786                 if (strcmp (cmethod->name, "Sin") == 0) {
4787                         MONO_INST_NEW (cfg, ins, OP_SIN);
4788                         ins->inst_i0 = args [0];
4789                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4790                         MONO_INST_NEW (cfg, ins, OP_COS);
4791                         ins->inst_i0 = args [0];
4792                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4793                         MONO_INST_NEW (cfg, ins, OP_TAN);
4794                         ins->inst_i0 = args [0];
4795                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4796                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4797                         ins->inst_i0 = args [0];
4798                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4799                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4800                         ins->inst_i0 = args [0];
4801                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4802                         MONO_INST_NEW (cfg, ins, OP_ABS);
4803                         ins->inst_i0 = args [0];
4804                 }
4805 #if 0
4806                 /* OP_FREM is not IEEE compatible */
4807                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4808                         MONO_INST_NEW (cfg, ins, OP_FREM);
4809                         ins->inst_i0 = args [0];
4810                         ins->inst_i1 = args [1];
4811                 }
4812 #endif
4813         } else if(cmethod->klass->image == mono_defaults.corlib &&
4814                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4815                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4816
4817                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4818                         MonoInst *ins_iconst;
4819
4820                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4821                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4822                         ins_iconst->inst_c0 = 1;
4823
4824                         ins->inst_i0 = args [0];
4825                         ins->inst_i1 = ins_iconst;
4826                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4827                         MonoInst *ins_iconst;
4828
4829                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4830                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4831                         ins_iconst->inst_c0 = -1;
4832
4833                         ins->inst_i0 = args [0];
4834                         ins->inst_i1 = ins_iconst;
4835                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4836                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4837
4838                         ins->inst_i0 = args [0];
4839                         ins->inst_i1 = args [1];
4840                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4841                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_I4);
4842
4843                         ins->inst_i0 = args [0];
4844                         ins->inst_i1 = args [1];
4845                 }
4846         }
4847
4848         return ins;
4849 }
4850
4851
4852 gboolean
4853 mono_arch_print_tree (MonoInst *tree, int arity)
4854 {
4855         return 0;
4856 }
4857
4858 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4859 {
4860         MonoInst* ins;
4861         
4862         if (appdomain_tls_offset == -1)
4863                 return NULL;
4864
4865         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4866         ins->inst_offset = appdomain_tls_offset;
4867         return ins;
4868 }
4869
4870 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4871 {
4872         MonoInst* ins;
4873
4874         if (thread_tls_offset == -1)
4875                 return NULL;
4876
4877         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4878         ins->inst_offset = thread_tls_offset;
4879         return ins;
4880 }
4881
4882 guint32
4883 mono_arch_get_patch_offset (guint8 *code)
4884 {
4885         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4886                 return 2;
4887         else if ((code [0] == 0xba))
4888                 return 1;
4889         else if ((code [0] == 0x68))
4890                 /* push IMM */
4891                 return 1;
4892         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4893                 /* push <OFFSET>(<REG>) */
4894                 return 2;
4895         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4896                 /* call *<OFFSET>(<REG>) */
4897                 return 2;
4898         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4899                 /* fldl <ADDR> */
4900                 return 2;
4901         else if ((code [0] == 0x58) && (code [1] == 0x05))
4902                 /* pop %eax; add <OFFSET>, %eax */
4903                 return 2;
4904         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4905                 /* pop <REG>; add <OFFSET>, <REG> */
4906                 return 3;
4907         else {
4908                 g_assert_not_reached ();
4909                 return -1;
4910         }
4911 }
4912
4913 gpointer*
4914 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
4915 {
4916         guint8 reg = 0;
4917         gint32 disp = 0;
4918
4919         /* go to the start of the call instruction
4920          *
4921          * address_byte = (m << 6) | (o << 3) | reg
4922          * call opcode: 0xff address_byte displacement
4923          * 0xff m=1,o=2 imm8
4924          * 0xff m=2,o=2 imm32
4925          */
4926         code -= 6;
4927         if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
4928                 reg = code [4] & 0x07;
4929                 disp = (signed char)code [5];
4930         } else {
4931                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
4932                         reg = code [1] & 0x07;
4933                         disp = *((gint32*)(code + 2));
4934                 } else if ((code [1] == 0xe8)) {
4935                         return NULL;
4936                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
4937                         /*
4938                          * This is a interface call: should check the above code can't catch it earlier 
4939                          * 8b 40 30   mov    0x30(%eax),%eax
4940                          * ff 10      call   *(%eax)
4941                          */
4942                         disp = 0;
4943                         reg = code [5] & 0x07;
4944                 }
4945                 else
4946                         return NULL;
4947         }
4948
4949         return (gpointer*)(((gint32)(regs [reg])) + disp);
4950 }
4951
4952 gpointer* 
4953 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
4954 {
4955         guint8 reg = 0;
4956         gint32 disp = 0;
4957
4958         code -= 7;
4959         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
4960                 reg = x86_modrm_rm (code [1]);
4961                 disp = code [4];
4962
4963                 if (reg == X86_EAX)
4964                         return NULL;
4965                 else
4966                         return (gpointer*)(((gint32)(regs [reg])) + disp);
4967         }
4968
4969         return NULL;
4970 }