Fix my last commit in CEE_DUP.
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14
15 #include <mono/metadata/appdomain.h>
16 #include <mono/metadata/debug-helpers.h>
17 #include <mono/metadata/threads.h>
18 #include <mono/metadata/profiler-private.h>
19 #include <mono/utils/mono-math.h>
20
21 #include "trace.h"
22 #include "mini-x86.h"
23 #include "inssel.h"
24 #include "cpu-pentium.h"
25
26 static gint lmf_tls_offset = -1;
27 static gint appdomain_tls_offset = -1;
28 static gint thread_tls_offset = -1;
29
30 #ifdef PLATFORM_WIN32
31 /* Under windows, the default pinvoke calling convention is stdcall */
32 #define CALLCONV_IS_STDCALL(call_conv) (((call_conv) == MONO_CALL_STDCALL) || ((call_conv) == MONO_CALL_DEFAULT))
33 #else
34 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
35 #endif
36
37 #define SIGNAL_STACK_SIZE (64 * 1024)
38
39 const char*
40 mono_arch_regname (int reg) {
41         switch (reg) {
42         case X86_EAX: return "%eax";
43         case X86_EBX: return "%ebx";
44         case X86_ECX: return "%ecx";
45         case X86_EDX: return "%edx";
46         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
47         case X86_EDI: return "%edi";
48         case X86_ESI: return "%esi";
49         }
50         return "unknown";
51 }
52
53 /*
54  * mono_arch_get_argument_info:
55  * @csig:  a method signature
56  * @param_count: the number of parameters to consider
57  * @arg_info: an array to store the result infos
58  *
59  * Gathers information on parameters such as size, alignment and
60  * padding. arg_info should be large enought to hold param_count + 1 entries. 
61  *
62  * Returns the size of the activation frame.
63  */
64 int
65 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
66 {
67         int k, frame_size = 0;
68         int size, align, pad;
69         int offset = 8;
70
71         if (MONO_TYPE_ISSTRUCT (csig->ret)) { 
72                 frame_size += sizeof (gpointer);
73                 offset += 4;
74         }
75
76         arg_info [0].offset = offset;
77
78         if (csig->hasthis) {
79                 frame_size += sizeof (gpointer);
80                 offset += 4;
81         }
82
83         arg_info [0].size = frame_size;
84
85         for (k = 0; k < param_count; k++) {
86                 
87                 if (csig->pinvoke)
88                         size = mono_type_native_stack_size (csig->params [k], &align);
89                 else
90                         size = mono_type_stack_size (csig->params [k], &align);
91
92                 /* ignore alignment for now */
93                 align = 1;
94
95                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
96                 arg_info [k].pad = pad;
97                 frame_size += size;
98                 arg_info [k + 1].pad = 0;
99                 arg_info [k + 1].size = size;
100                 offset += pad;
101                 arg_info [k + 1].offset = offset;
102                 offset += size;
103         }
104
105         align = MONO_ARCH_FRAME_ALIGNMENT;
106         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
107         arg_info [k].pad = pad;
108
109         return frame_size;
110 }
111
112 static const guchar cpuid_impl [] = {
113         0x55,                           /* push   %ebp */
114         0x89, 0xe5,                     /* mov    %esp,%ebp */
115         0x53,                           /* push   %ebx */
116         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
117         0x0f, 0xa2,                     /* cpuid   */
118         0x50,                           /* push   %eax */
119         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
120         0x89, 0x18,                     /* mov    %ebx,(%eax) */
121         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
122         0x89, 0x08,                     /* mov    %ecx,(%eax) */
123         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
124         0x89, 0x10,                     /* mov    %edx,(%eax) */
125         0x58,                           /* pop    %eax */
126         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
127         0x89, 0x02,                     /* mov    %eax,(%edx) */
128         0x5b,                           /* pop    %ebx */
129         0xc9,                           /* leave   */
130         0xc3,                           /* ret     */
131 };
132
133 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
134
135 static int 
136 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
137 {
138         int have_cpuid = 0;
139         __asm__  __volatile__ (
140                 "pushfl\n"
141                 "popl %%eax\n"
142                 "movl %%eax, %%edx\n"
143                 "xorl $0x200000, %%eax\n"
144                 "pushl %%eax\n"
145                 "popfl\n"
146                 "pushfl\n"
147                 "popl %%eax\n"
148                 "xorl %%edx, %%eax\n"
149                 "andl $0x200000, %%eax\n"
150                 "movl %%eax, %0"
151                 : "=r" (have_cpuid)
152                 :
153                 : "%eax", "%edx"
154         );
155
156         if (have_cpuid) {
157                 CpuidFunc func = (CpuidFunc)cpuid_impl;
158                 func (id, p_eax, p_ebx, p_ecx, p_edx);
159                 /*
160                  * We use this approach because of issues with gcc and pic code, see:
161                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
162                 __asm__ __volatile__ ("cpuid"
163                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
164                         : "a" (id));
165                 */
166                 return 1;
167         }
168         return 0;
169 }
170
171 /*
172  * Initialize the cpu to execute managed code.
173  */
174 void
175 mono_arch_cpu_init (void)
176 {
177         guint16 fpcw;
178
179         /* spec compliance requires running with double precision */
180         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
181         fpcw &= ~X86_FPCW_PRECC_MASK;
182         fpcw |= X86_FPCW_PREC_DOUBLE;
183         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
184         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
185
186 }
187
188 /*
189  * This function returns the optimizations supported on this cpu.
190  */
191 guint32
192 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
193 {
194         int eax, ebx, ecx, edx;
195         guint32 opts = 0;
196         
197         *exclude_mask = 0;
198         /* Feature Flags function, flags returned in EDX. */
199         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
200                 if (edx & (1 << 15)) {
201                         opts |= MONO_OPT_CMOV;
202                         if (edx & 1)
203                                 opts |= MONO_OPT_FCMOV;
204                         else
205                                 *exclude_mask |= MONO_OPT_FCMOV;
206                 } else
207                         *exclude_mask |= MONO_OPT_CMOV;
208         }
209         return opts;
210 }
211
212 /*
213  * Determine whenever the trap whose info is in SIGINFO is caused by
214  * integer overflow.
215  */
216 gboolean
217 mono_arch_is_int_overflow (void *sigctx, void *info)
218 {
219         struct sigcontext *ctx = (struct sigcontext*)sigctx;
220         guint8* ip;
221
222         ip = (guint8*)ctx->SC_EIP;
223
224         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
225                 gint32 reg;
226
227                 /* idiv REG */
228                 switch (x86_modrm_rm (ip [1])) {
229                 case X86_ECX:
230                         reg = ctx->SC_ECX;
231                         break;
232                 case X86_EBX:
233                         reg = ctx->SC_EBX;
234                         break;
235                 default:
236                         g_assert_not_reached ();
237                         reg = -1;
238                 }
239
240                 if (reg == -1)
241                         return TRUE;
242         }
243                         
244         return FALSE;
245 }
246
247 static gboolean
248 is_regsize_var (MonoType *t) {
249         if (t->byref)
250                 return TRUE;
251         switch (mono_type_get_underlying_type (t)->type) {
252         case MONO_TYPE_I4:
253         case MONO_TYPE_U4:
254         case MONO_TYPE_I:
255         case MONO_TYPE_U:
256         case MONO_TYPE_PTR:
257                 return TRUE;
258         case MONO_TYPE_OBJECT:
259         case MONO_TYPE_STRING:
260         case MONO_TYPE_CLASS:
261         case MONO_TYPE_SZARRAY:
262         case MONO_TYPE_ARRAY:
263                 return TRUE;
264         case MONO_TYPE_VALUETYPE:
265                 return FALSE;
266         }
267         return FALSE;
268 }
269
270 GList *
271 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
272 {
273         GList *vars = NULL;
274         int i;
275
276         for (i = 0; i < cfg->num_varinfo; i++) {
277                 MonoInst *ins = cfg->varinfo [i];
278                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
279
280                 /* unused vars */
281                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
282                         continue;
283
284                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
285                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
286                         continue;
287
288                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
289                  * 8bit quantities in caller saved registers on x86 */
290                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
291                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
292                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
293                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
294                         g_assert (i == vmv->idx);
295                         vars = g_list_prepend (vars, vmv);
296                 }
297         }
298
299         vars = mono_varlist_sort (cfg, vars, 0);
300
301         return vars;
302 }
303
304 GList *
305 mono_arch_get_global_int_regs (MonoCompile *cfg)
306 {
307         GList *regs = NULL;
308
309         /* we can use 3 registers for global allocation */
310         regs = g_list_prepend (regs, (gpointer)X86_EBX);
311         regs = g_list_prepend (regs, (gpointer)X86_ESI);
312         regs = g_list_prepend (regs, (gpointer)X86_EDI);
313
314         return regs;
315 }
316
317 /*
318  * mono_arch_regalloc_cost:
319  *
320  *  Return the cost, in number of memory references, of the action of 
321  * allocating the variable VMV into a register during global register
322  * allocation.
323  */
324 guint32
325 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
326 {
327         MonoInst *ins = cfg->varinfo [vmv->idx];
328
329         if (cfg->method->save_lmf)
330                 /* The register is already saved */
331                 return (ins->opcode == OP_ARG) ? 1 : 0;
332         else
333                 /* push+pop+possible load if it is an argument */
334                 return (ins->opcode == OP_ARG) ? 3 : 2;
335 }
336  
337 /*
338  * Set var information according to the calling convention. X86 version.
339  * The locals var stuff should most likely be split in another method.
340  */
341 void
342 mono_arch_allocate_vars (MonoCompile *m)
343 {
344         MonoMethodSignature *sig;
345         MonoMethodHeader *header;
346         MonoInst *inst;
347         int i, offset, size, align, curinst;
348
349         header = mono_method_get_header (m->method);
350
351         sig = m->method->signature;
352
353         offset = 8;
354         curinst = 0;
355         if (MONO_TYPE_ISSTRUCT (sig->ret)) {
356                 m->ret->opcode = OP_REGOFFSET;
357                 m->ret->inst_basereg = X86_EBP;
358                 m->ret->inst_offset = offset;
359                 offset += sizeof (gpointer);
360         } else {
361                 /* FIXME: handle long and FP values */
362                 switch (sig->ret->type) {
363                 case MONO_TYPE_VOID:
364                         break;
365                 default:
366                         m->ret->opcode = OP_REGVAR;
367                         m->ret->inst_c0 = X86_EAX;
368                         break;
369                 }
370         }
371         if (sig->hasthis) {
372                 inst = m->varinfo [curinst];
373                 if (inst->opcode != OP_REGVAR) {
374                         inst->opcode = OP_REGOFFSET;
375                         inst->inst_basereg = X86_EBP;
376                 }
377                 inst->inst_offset = offset;
378                 offset += sizeof (gpointer);
379                 curinst++;
380         }
381
382         if (sig->call_convention == MONO_CALL_VARARG) {
383                 m->sig_cookie = offset;
384                 offset += sizeof (gpointer);
385         }
386
387         for (i = 0; i < sig->param_count; ++i) {
388                 inst = m->varinfo [curinst];
389                 if (inst->opcode != OP_REGVAR) {
390                         inst->opcode = OP_REGOFFSET;
391                         inst->inst_basereg = X86_EBP;
392                 }
393                 inst->inst_offset = offset;
394                 size = mono_type_size (sig->params [i], &align);
395                 size += 4 - 1;
396                 size &= ~(4 - 1);
397                 offset += size;
398                 curinst++;
399         }
400
401         offset = 0;
402
403         /* reserve space to save LMF and caller saved registers */
404
405         if (m->method->save_lmf) {
406                 offset += sizeof (MonoLMF);
407         } else {
408                 if (m->used_int_regs & (1 << X86_EBX)) {
409                         offset += 4;
410                 }
411
412                 if (m->used_int_regs & (1 << X86_EDI)) {
413                         offset += 4;
414                 }
415
416                 if (m->used_int_regs & (1 << X86_ESI)) {
417                         offset += 4;
418                 }
419         }
420
421         for (i = curinst; i < m->num_varinfo; ++i) {
422                 inst = m->varinfo [i];
423
424                 if ((inst->flags & MONO_INST_IS_DEAD) || inst->opcode == OP_REGVAR)
425                         continue;
426
427                 /* inst->unused indicates native sized value types, this is used by the
428                 * pinvoke wrappers when they call functions returning structure */
429                 if (inst->unused && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF)
430                         size = mono_class_native_size (inst->inst_vtype->data.klass, &align);
431                 else
432                         size = mono_type_size (inst->inst_vtype, &align);
433
434                 offset += size;
435                 offset += align - 1;
436                 offset &= ~(align - 1);
437                 inst->opcode = OP_REGOFFSET;
438                 inst->inst_basereg = X86_EBP;
439                 inst->inst_offset = -offset;
440                 //g_print ("allocating local %d to %d\n", i, -offset);
441         }
442         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
443         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
444
445         /* change sign? */
446         m->stack_offset = -offset;
447 }
448
449 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
450  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
451  */
452
453 /* 
454  * take the arguments and generate the arch-specific
455  * instructions to properly call the function in call.
456  * This includes pushing, moving arguments to the right register
457  * etc.
458  * Issue: who does the spilling if needed, and when?
459  */
460 MonoCallInst*
461 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
462         MonoInst *arg, *in;
463         MonoMethodSignature *sig;
464         int i, n, stack_size, type;
465         MonoType *ptype;
466
467         stack_size = 0;
468         /* add the vararg cookie before the non-implicit args */
469         if (call->signature->call_convention == MONO_CALL_VARARG) {
470                 MonoInst *sig_arg;
471                 /* FIXME: Add support for signature tokens to AOT */
472                 cfg->disable_aot = TRUE;
473                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
474                 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
475                 sig_arg->inst_p0 = call->signature;
476                 arg->inst_left = sig_arg;
477                 arg->type = STACK_PTR;
478                 /* prepend, so they get reversed */
479                 arg->next = call->out_args;
480                 call->out_args = arg;
481                 stack_size += sizeof (gpointer);
482         }
483         sig = call->signature;
484         n = sig->param_count + sig->hasthis;
485
486         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
487                 stack_size += sizeof (gpointer);
488         for (i = 0; i < n; ++i) {
489                 if (is_virtual && i == 0) {
490                         /* the argument will be attached to the call instrucion */
491                         in = call->args [i];
492                         stack_size += 4;
493                 } else {
494                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
495                         in = call->args [i];
496                         arg->cil_code = in->cil_code;
497                         arg->inst_left = in;
498                         arg->type = in->type;
499                         /* prepend, so they get reversed */
500                         arg->next = call->out_args;
501                         call->out_args = arg;
502                         if (i >= sig->hasthis) {
503                                 MonoType *t = sig->params [i - sig->hasthis];
504                                 ptype = mono_type_get_underlying_type (t);
505                                 if (t->byref)
506                                         type = MONO_TYPE_U;
507                                 else
508                                         type = ptype->type;
509                                 /* FIXME: validate arguments... */
510                                 switch (type) {
511                                 case MONO_TYPE_I:
512                                 case MONO_TYPE_U:
513                                 case MONO_TYPE_BOOLEAN:
514                                 case MONO_TYPE_CHAR:
515                                 case MONO_TYPE_I1:
516                                 case MONO_TYPE_U1:
517                                 case MONO_TYPE_I2:
518                                 case MONO_TYPE_U2:
519                                 case MONO_TYPE_I4:
520                                 case MONO_TYPE_U4:
521                                 case MONO_TYPE_STRING:
522                                 case MONO_TYPE_CLASS:
523                                 case MONO_TYPE_OBJECT:
524                                 case MONO_TYPE_PTR:
525                                 case MONO_TYPE_FNPTR:
526                                 case MONO_TYPE_ARRAY:
527                                 case MONO_TYPE_SZARRAY:
528                                         stack_size += 4;
529                                         break;
530                                 case MONO_TYPE_I8:
531                                 case MONO_TYPE_U8:
532                                         stack_size += 8;
533                                         break;
534                                 case MONO_TYPE_R4:
535                                         stack_size += 4;
536                                         arg->opcode = OP_OUTARG_R4;
537                                         break;
538                                 case MONO_TYPE_R8:
539                                         stack_size += 8;
540                                         arg->opcode = OP_OUTARG_R8;
541                                         break;
542                                 case MONO_TYPE_VALUETYPE: {
543                                         int size;
544                                         if (sig->pinvoke) 
545                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
546                                         else 
547                                                 size = mono_type_stack_size (&in->klass->byval_arg, NULL);
548
549                                         stack_size += size;
550                                         arg->opcode = OP_OUTARG_VT;
551                                         arg->klass = in->klass;
552                                         arg->unused = sig->pinvoke;
553                                         arg->inst_imm = size; 
554                                         break;
555                                 }
556                                 case MONO_TYPE_TYPEDBYREF:
557                                         stack_size += sizeof (MonoTypedRef);
558                                         arg->opcode = OP_OUTARG_VT;
559                                         arg->klass = in->klass;
560                                         arg->unused = sig->pinvoke;
561                                         arg->inst_imm = sizeof (MonoTypedRef); 
562                                         break;
563                                 default:
564                                         g_error ("unknown type 0x%02x in mono_arch_call_opcode\n", type);
565                                 }
566                         } else {
567                                 /* the this argument */
568                                 stack_size += 4;
569                         }
570                 }
571         }
572         /* if the function returns a struct, the called method already does a ret $0x4 */
573         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
574                 stack_size -= 4;
575         call->stack_usage = stack_size;
576         /* 
577          * should set more info in call, such as the stack space
578          * used by the args that needs to be added back to esp
579          */
580
581         return call;
582 }
583
584 /*
585  * Allow tracing to work with this interface (with an optional argument)
586  */
587
588 /*
589  * This may be needed on some archs or for debugging support.
590  */
591 void
592 mono_arch_instrument_mem_needs (MonoMethod *method, int *stack, int *code)
593 {
594         /* no stack room needed now (may be needed for FASTCALL-trace support) */
595         *stack = 0;
596         /* split prolog-epilog requirements? */
597         *code = 50; /* max bytes needed: check this number */
598 }
599
600 void*
601 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
602 {
603         guchar *code = p;
604
605         /* if some args are passed in registers, we need to save them here */
606         x86_push_reg (code, X86_EBP);
607         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
608         x86_push_imm (code, cfg->method);
609         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
610         x86_call_code (code, 0);
611         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
612
613         return code;
614 }
615
616 enum {
617         SAVE_NONE,
618         SAVE_STRUCT,
619         SAVE_EAX,
620         SAVE_EAX_EDX,
621         SAVE_FP
622 };
623
624 void*
625 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
626 {
627         guchar *code = p;
628         int arg_size = 0, save_mode = SAVE_NONE;
629         MonoMethod *method = cfg->method;
630         
631         switch (mono_type_get_underlying_type (method->signature->ret)->type) {
632         case MONO_TYPE_VOID:
633                 /* special case string .ctor icall */
634                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
635                         save_mode = SAVE_EAX;
636                 else
637                         save_mode = SAVE_NONE;
638                 break;
639         case MONO_TYPE_I8:
640         case MONO_TYPE_U8:
641                 save_mode = SAVE_EAX_EDX;
642                 break;
643         case MONO_TYPE_R4:
644         case MONO_TYPE_R8:
645                 save_mode = SAVE_FP;
646                 break;
647         case MONO_TYPE_VALUETYPE:
648                 save_mode = SAVE_STRUCT;
649                 break;
650         default:
651                 save_mode = SAVE_EAX;
652                 break;
653         }
654
655         switch (save_mode) {
656         case SAVE_EAX_EDX:
657                 x86_push_reg (code, X86_EDX);
658                 x86_push_reg (code, X86_EAX);
659                 if (enable_arguments) {
660                         x86_push_reg (code, X86_EDX);
661                         x86_push_reg (code, X86_EAX);
662                         arg_size = 8;
663                 }
664                 break;
665         case SAVE_EAX:
666                 x86_push_reg (code, X86_EAX);
667                 if (enable_arguments) {
668                         x86_push_reg (code, X86_EAX);
669                         arg_size = 4;
670                 }
671                 break;
672         case SAVE_FP:
673                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
674                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
675                 if (enable_arguments) {
676                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
677                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
678                         arg_size = 8;
679                 }
680                 break;
681         case SAVE_STRUCT:
682                 if (enable_arguments) {
683                         x86_push_membase (code, X86_EBP, 8);
684                         arg_size = 4;
685                 }
686                 break;
687         case SAVE_NONE:
688         default:
689                 break;
690         }
691
692
693         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
694         x86_push_imm (code, method);
695         mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
696         x86_call_code (code, 0);
697         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
698
699         switch (save_mode) {
700         case SAVE_EAX_EDX:
701                 x86_pop_reg (code, X86_EAX);
702                 x86_pop_reg (code, X86_EDX);
703                 break;
704         case SAVE_EAX:
705                 x86_pop_reg (code, X86_EAX);
706                 break;
707         case SAVE_FP:
708                 x86_fld_membase (code, X86_ESP, 0, TRUE);
709                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
710                 break;
711         case SAVE_NONE:
712         default:
713                 break;
714         }
715
716         return code;
717 }
718
719 #define EMIT_COND_BRANCH(ins,cond,sign) \
720 if (ins->flags & MONO_INST_BRLABEL) { \
721         if (ins->inst_i0->inst_c0) { \
722                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
723         } else { \
724                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
725                 if ((cfg->opt & MONO_OPT_BRANCH) && \
726                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
727                         x86_branch8 (code, cond, 0, sign); \
728                 else \
729                         x86_branch32 (code, cond, 0, sign); \
730         } \
731 } else { \
732         if (ins->inst_true_bb->native_offset) { \
733                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
734         } else { \
735                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
736                 if ((cfg->opt & MONO_OPT_BRANCH) && \
737                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
738                         x86_branch8 (code, cond, 0, sign); \
739                 else \
740                         x86_branch32 (code, cond, 0, sign); \
741         } \
742 }
743
744 /* emit an exception if condition is fail */
745 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
746         do {                                                        \
747                 mono_add_patch_info (cfg, code - cfg->native_code,   \
748                                     MONO_PATCH_INFO_EXC, exc_name);  \
749                 x86_branch32 (code, cond, 0, signed);               \
750         } while (0); 
751
752 #define EMIT_FPCOMPARE(code) do { \
753         x86_fcompp (code); \
754         x86_fnstsw (code); \
755 } while (0); 
756
757 /* FIXME: Add more instructions */
758 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM))
759
760 static void
761 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
762 {
763         MonoInst *ins, *last_ins = NULL;
764         ins = bb->code;
765
766         while (ins) {
767
768                 switch (ins->opcode) {
769                 case OP_ICONST:
770                         /* reg = 0 -> XOR (reg, reg) */
771                         /* XOR sets cflags on x86, so we cant do it always */
772                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
773                                 ins->opcode = CEE_XOR;
774                                 ins->sreg1 = ins->dreg;
775                                 ins->sreg2 = ins->dreg;
776                         }
777                         break;
778                 case OP_MUL_IMM: 
779                         /* remove unnecessary multiplication with 1 */
780                         if (ins->inst_imm == 1) {
781                                 if (ins->dreg != ins->sreg1) {
782                                         ins->opcode = OP_MOVE;
783                                 } else {
784                                         last_ins->next = ins->next;
785                                         ins = ins->next;
786                                         continue;
787                                 }
788                         }
789                         break;
790                 case OP_COMPARE_IMM:
791                         /* OP_COMPARE_IMM (reg, 0) 
792                          * --> 
793                          * OP_X86_TEST_NULL (reg) 
794                          */
795                         if (!ins->inst_imm)
796                                 ins->opcode = OP_X86_TEST_NULL;
797                         break;
798                 case OP_X86_COMPARE_MEMBASE_IMM:
799                         /* 
800                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
801                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
802                          * -->
803                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
804                          * OP_COMPARE_IMM reg, imm
805                          *
806                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
807                          */
808                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
809                             ins->inst_basereg == last_ins->inst_destbasereg &&
810                             ins->inst_offset == last_ins->inst_offset) {
811                                         ins->opcode = OP_COMPARE_IMM;
812                                         ins->sreg1 = last_ins->sreg1;
813
814                                         /* check if we can remove cmp reg,0 with test null */
815                                         if (!ins->inst_imm)
816                                                 ins->opcode = OP_X86_TEST_NULL;
817                                 }
818
819                         break;
820                 case OP_LOAD_MEMBASE:
821                 case OP_LOADI4_MEMBASE:
822                         /* 
823                          * Note: if reg1 = reg2 the load op is removed
824                          *
825                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
826                          * OP_LOAD_MEMBASE offset(basereg), reg2
827                          * -->
828                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
829                          * OP_MOVE reg1, reg2
830                          */
831                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
832                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
833                             ins->inst_basereg == last_ins->inst_destbasereg &&
834                             ins->inst_offset == last_ins->inst_offset) {
835                                 if (ins->dreg == last_ins->sreg1) {
836                                         last_ins->next = ins->next;                             
837                                         ins = ins->next;                                
838                                         continue;
839                                 } else {
840                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
841                                         ins->opcode = OP_MOVE;
842                                         ins->sreg1 = last_ins->sreg1;
843                                 }
844
845                         /* 
846                          * Note: reg1 must be different from the basereg in the second load
847                          * Note: if reg1 = reg2 is equal then second load is removed
848                          *
849                          * OP_LOAD_MEMBASE offset(basereg), reg1
850                          * OP_LOAD_MEMBASE offset(basereg), reg2
851                          * -->
852                          * OP_LOAD_MEMBASE offset(basereg), reg1
853                          * OP_MOVE reg1, reg2
854                          */
855                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
856                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
857                               ins->inst_basereg != last_ins->dreg &&
858                               ins->inst_basereg == last_ins->inst_basereg &&
859                               ins->inst_offset == last_ins->inst_offset) {
860
861                                 if (ins->dreg == last_ins->dreg) {
862                                         last_ins->next = ins->next;                             
863                                         ins = ins->next;                                
864                                         continue;
865                                 } else {
866                                         ins->opcode = OP_MOVE;
867                                         ins->sreg1 = last_ins->dreg;
868                                 }
869
870                                 //g_assert_not_reached ();
871
872 #if 0
873                         /* 
874                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
875                          * OP_LOAD_MEMBASE offset(basereg), reg
876                          * -->
877                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
878                          * OP_ICONST reg, imm
879                          */
880                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
881                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
882                                    ins->inst_basereg == last_ins->inst_destbasereg &&
883                                    ins->inst_offset == last_ins->inst_offset) {
884                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
885                                 ins->opcode = OP_ICONST;
886                                 ins->inst_c0 = last_ins->inst_imm;
887                                 g_assert_not_reached (); // check this rule
888 #endif
889                         }
890                         break;
891                 case OP_LOADU1_MEMBASE:
892                 case OP_LOADI1_MEMBASE:
893                         /* 
894                          * Note: if reg1 = reg2 the load op is removed
895                          *
896                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
897                          * OP_LOAD_MEMBASE offset(basereg), reg2
898                          * -->
899                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
900                          * OP_MOVE reg1, reg2
901                          */
902                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
903                                         ins->inst_basereg == last_ins->inst_destbasereg &&
904                                         ins->inst_offset == last_ins->inst_offset) {
905                                 if (ins->dreg == last_ins->sreg1) {
906                                         last_ins->next = ins->next;                             
907                                         ins = ins->next;                                
908                                         continue;
909                                 } else {
910                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
911                                         ins->opcode = OP_MOVE;
912                                         ins->sreg1 = last_ins->sreg1;
913                                 }
914                         }
915                         break;
916                 case OP_LOADU2_MEMBASE:
917                 case OP_LOADI2_MEMBASE:
918                         /* 
919                          * Note: if reg1 = reg2 the load op is removed
920                          *
921                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
922                          * OP_LOAD_MEMBASE offset(basereg), reg2
923                          * -->
924                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
925                          * OP_MOVE reg1, reg2
926                          */
927                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
928                                         ins->inst_basereg == last_ins->inst_destbasereg &&
929                                         ins->inst_offset == last_ins->inst_offset) {
930                                 if (ins->dreg == last_ins->sreg1) {
931                                         last_ins->next = ins->next;                             
932                                         ins = ins->next;                                
933                                         continue;
934                                 } else {
935                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
936                                         ins->opcode = OP_MOVE;
937                                         ins->sreg1 = last_ins->sreg1;
938                                 }
939                         }
940                         break;
941                 case CEE_CONV_I4:
942                 case CEE_CONV_U4:
943                 case OP_MOVE:
944                         /*
945                          * Removes:
946                          *
947                          * OP_MOVE reg, reg 
948                          */
949                         if (ins->dreg == ins->sreg1) {
950                                 if (last_ins)
951                                         last_ins->next = ins->next;                             
952                                 ins = ins->next;
953                                 continue;
954                         }
955                         /* 
956                          * Removes:
957                          *
958                          * OP_MOVE sreg, dreg 
959                          * OP_MOVE dreg, sreg
960                          */
961                         if (last_ins && last_ins->opcode == OP_MOVE &&
962                             ins->sreg1 == last_ins->dreg &&
963                             ins->dreg == last_ins->sreg1) {
964                                 last_ins->next = ins->next;                             
965                                 ins = ins->next;                                
966                                 continue;
967                         }
968                         break;
969                 }
970                 last_ins = ins;
971                 ins = ins->next;
972         }
973         bb->last_ins = last_ins;
974 }
975
976 static const int 
977 branch_cc_table [] = {
978         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
979         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
980         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
981 };
982
983 #define DEBUG(a) if (cfg->verbose_level > 1) a
984 //#define DEBUG(a)
985
986 /*
987  * returns the offset used by spillvar. It allocates a new
988  * spill variable if necessary. 
989  */
990 static int
991 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
992 {
993         MonoSpillInfo **si, *info;
994         int i = 0;
995
996         si = &cfg->spill_info; 
997         
998         while (i <= spillvar) {
999
1000                 if (!*si) {
1001                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1002                         info->next = NULL;
1003                         cfg->stack_offset -= sizeof (gpointer);
1004                         info->offset = cfg->stack_offset;
1005                 }
1006
1007                 if (i == spillvar)
1008                         return (*si)->offset;
1009
1010                 i++;
1011                 si = &(*si)->next;
1012         }
1013
1014         g_assert_not_reached ();
1015         return 0;
1016 }
1017
1018 /*
1019  * returns the offset used by spillvar. It allocates a new
1020  * spill float variable if necessary. 
1021  * (same as mono_spillvar_offset but for float)
1022  */
1023 static int
1024 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1025 {
1026         MonoSpillInfo **si, *info;
1027         int i = 0;
1028
1029         si = &cfg->spill_info_float; 
1030         
1031         while (i <= spillvar) {
1032
1033                 if (!*si) {
1034                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1035                         info->next = NULL;
1036                         cfg->stack_offset -= sizeof (double);
1037                         info->offset = cfg->stack_offset;
1038                 }
1039
1040                 if (i == spillvar)
1041                         return (*si)->offset;
1042
1043                 i++;
1044                 si = &(*si)->next;
1045         }
1046
1047         g_assert_not_reached ();
1048         return 0;
1049 }
1050
1051 /*
1052  * Creates a store for spilled floating point items
1053  */
1054 static MonoInst*
1055 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1056 {
1057         MonoInst *store;
1058         MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1059         store->sreg1 = reg;
1060         store->inst_destbasereg = X86_EBP;
1061         store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1062
1063         DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08x(%%sp)) (from %d)\n", spill, store->inst_offset, reg));
1064         return store;
1065 }
1066
1067 /*
1068  * Creates a load for spilled floating point items 
1069  */
1070 static MonoInst*
1071 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1072 {
1073         MonoInst *load;
1074         MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1075         load->dreg = reg;
1076         load->inst_basereg = X86_EBP;
1077         load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1078
1079         DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08x(%%sp)) (from %d)\n", spill, load->inst_offset, reg));
1080         return load;
1081 }
1082
1083 #define reg_is_freeable(r) ((r) >= 0 && (r) <= 7 && X86_IS_CALLEE ((r)))
1084
1085 typedef struct {
1086         int born_in;
1087         int killed_in;
1088         int last_use;
1089         int prev_use;
1090         int flags;              /* used to track fp spill/load */
1091 } RegTrack;
1092
1093 static const char*const * ins_spec = pentium_desc;
1094
1095 static void
1096 print_ins (int i, MonoInst *ins)
1097 {
1098         const char *spec = ins_spec [ins->opcode];
1099         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1100         if (spec [MONO_INST_DEST]) {
1101                 if (ins->dreg >= MONO_MAX_IREGS)
1102                         g_print (" R%d <-", ins->dreg);
1103                 else
1104                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1105         }
1106         if (spec [MONO_INST_SRC1]) {
1107                 if (ins->sreg1 >= MONO_MAX_IREGS)
1108                         g_print (" R%d", ins->sreg1);
1109                 else
1110                         g_print (" %s", mono_arch_regname (ins->sreg1));
1111         }
1112         if (spec [MONO_INST_SRC2]) {
1113                 if (ins->sreg2 >= MONO_MAX_IREGS)
1114                         g_print (" R%d", ins->sreg2);
1115                 else
1116                         g_print (" %s", mono_arch_regname (ins->sreg2));
1117         }
1118         if (spec [MONO_INST_CLOB])
1119                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1120         g_print ("\n");
1121 }
1122
1123 static void
1124 print_regtrack (RegTrack *t, int num)
1125 {
1126         int i;
1127         char buf [32];
1128         const char *r;
1129         
1130         for (i = 0; i < num; ++i) {
1131                 if (!t [i].born_in)
1132                         continue;
1133                 if (i >= MONO_MAX_IREGS) {
1134                         g_snprintf (buf, sizeof(buf), "R%d", i);
1135                         r = buf;
1136                 } else
1137                         r = mono_arch_regname (i);
1138                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1139         }
1140 }
1141
1142 typedef struct InstList InstList;
1143
1144 struct InstList {
1145         InstList *prev;
1146         InstList *next;
1147         MonoInst *data;
1148 };
1149
1150 static inline InstList*
1151 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1152 {
1153         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1154         item->data = data;
1155         item->prev = NULL;
1156         item->next = list;
1157         if (list)
1158                 list->prev = item;
1159         return item;
1160 }
1161
1162 /*
1163  * Force the spilling of the variable in the symbolic register 'reg'.
1164  */
1165 static int
1166 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1167 {
1168         MonoInst *load;
1169         int i, sel, spill;
1170         
1171         sel = cfg->rs->iassign [reg];
1172         /*i = cfg->rs->isymbolic [sel];
1173         g_assert (i == reg);*/
1174         i = reg;
1175         spill = ++cfg->spill_count;
1176         cfg->rs->iassign [i] = -spill - 1;
1177         mono_regstate_free_int (cfg->rs, sel);
1178         /* we need to create a spill var and insert a load to sel after the current instruction */
1179         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1180         load->dreg = sel;
1181         load->inst_basereg = X86_EBP;
1182         load->inst_offset = mono_spillvar_offset (cfg, spill);
1183         if (item->prev) {
1184                 while (ins->next != item->prev->data)
1185                         ins = ins->next;
1186         }
1187         load->next = ins->next;
1188         ins->next = load;
1189         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1190         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1191         g_assert (i == sel);
1192
1193         return sel;
1194 }
1195
1196 static int
1197 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1198 {
1199         MonoInst *load;
1200         int i, sel, spill;
1201
1202         DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1203         /* exclude the registers in the current instruction */
1204         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1205                 if (ins->sreg1 >= MONO_MAX_IREGS)
1206                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1207                 else
1208                         regmask &= ~ (1 << ins->sreg1);
1209                 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1210         }
1211         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1212                 if (ins->sreg2 >= MONO_MAX_IREGS)
1213                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1214                 else
1215                         regmask &= ~ (1 << ins->sreg2);
1216                 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1217         }
1218         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1219                 regmask &= ~ (1 << ins->dreg);
1220                 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1221         }
1222
1223         DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1224         g_assert (regmask); /* need at least a register we can free */
1225         sel = -1;
1226         /* we should track prev_use and spill the register that's farther */
1227         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1228                 if (regmask & (1 << i)) {
1229                         sel = i;
1230                         DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1231                         break;
1232                 }
1233         }
1234         i = cfg->rs->isymbolic [sel];
1235         spill = ++cfg->spill_count;
1236         cfg->rs->iassign [i] = -spill - 1;
1237         mono_regstate_free_int (cfg->rs, sel);
1238         /* we need to create a spill var and insert a load to sel after the current instruction */
1239         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1240         load->dreg = sel;
1241         load->inst_basereg = X86_EBP;
1242         load->inst_offset = mono_spillvar_offset (cfg, spill);
1243         if (item->prev) {
1244                 while (ins->next != item->prev->data)
1245                         ins = ins->next;
1246         }
1247         load->next = ins->next;
1248         ins->next = load;
1249         DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1250         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1251         g_assert (i == sel);
1252         
1253         return sel;
1254 }
1255
1256 static MonoInst*
1257 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1258 {
1259         MonoInst *copy;
1260         MONO_INST_NEW (cfg, copy, OP_MOVE);
1261         copy->dreg = dest;
1262         copy->sreg1 = src;
1263         if (ins) {
1264                 copy->next = ins->next;
1265                 ins->next = copy;
1266         }
1267         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1268         return copy;
1269 }
1270
1271 static MonoInst*
1272 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1273 {
1274         MonoInst *store;
1275         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1276         store->sreg1 = reg;
1277         store->inst_destbasereg = X86_EBP;
1278         store->inst_offset = mono_spillvar_offset (cfg, spill);
1279         if (ins) {
1280                 store->next = ins->next;
1281                 ins->next = store;
1282         }
1283         DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08x(%%ebp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
1284         return store;
1285 }
1286
1287 static void
1288 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1289 {
1290         MonoInst *prev;
1291         if (item->next) {
1292                 prev = item->next->data;
1293
1294                 while (prev->next != ins)
1295                         prev = prev->next;
1296                 to_insert->next = ins;
1297                 prev->next = to_insert;
1298         } else {
1299                 to_insert->next = ins;
1300         }
1301         /* 
1302          * needed otherwise in the next instruction we can add an ins to the 
1303          * end and that would get past this instruction.
1304          */
1305         item->data = to_insert; 
1306 }
1307
1308
1309 #if  0
1310 static int
1311 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1312 {
1313         int val = cfg->rs->iassign [sym_reg];
1314         if (val < 0) {
1315                 int spill = 0;
1316                 if (val < -1) {
1317                         /* the register gets spilled after this inst */
1318                         spill = -val -1;
1319                 }
1320                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1321                 if (val < 0)
1322                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1323                 cfg->rs->iassign [sym_reg] = val;
1324                 /* add option to store before the instruction for src registers */
1325                 if (spill)
1326                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1327         }
1328         cfg->rs->isymbolic [val] = sym_reg;
1329         return val;
1330 }
1331 #endif
1332
1333 /* flags used in reginfo->flags */
1334 enum {
1335         MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
1336         MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
1337         MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
1338         MONO_X86_REG_NOT_ECX                    = 1 << 3,
1339         MONO_X86_REG_EAX                                = 1 << 4,
1340         MONO_X86_REG_EDX                                = 1 << 5,
1341         MONO_X86_REG_ECX                                = 1 << 6
1342 };
1343
1344 static int
1345 mono_x86_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1346 {
1347         int val;
1348         int test_mask = dest_mask;
1349
1350         if (flags & MONO_X86_REG_EAX)
1351                 test_mask &= (1 << X86_EAX);
1352         else if (flags & MONO_X86_REG_EDX)
1353                 test_mask &= (1 << X86_EDX);
1354         else if (flags & MONO_X86_REG_ECX)
1355                 test_mask &= (1 << X86_ECX);
1356         else if (flags & MONO_X86_REG_NOT_ECX)
1357                 test_mask &= ~ (1 << X86_ECX);
1358
1359         val = mono_regstate_alloc_int (cfg->rs, test_mask);
1360         if (val >= 0 && test_mask != dest_mask)
1361                 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1362
1363         if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1364                 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1365                 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << X86_ECX)));
1366         }
1367
1368         if (val < 0) {
1369                 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1370                 if (val < 0)
1371                         val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1372         }
1373
1374         return val;
1375 }
1376
1377
1378 /*#include "cprop.c"*/
1379
1380 /*
1381  * Local register allocation.
1382  * We first scan the list of instructions and we save the liveness info of
1383  * each register (when the register is first used, when it's value is set etc.).
1384  * We also reverse the list of instructions (in the InstList list) because assigning
1385  * registers backwards allows for more tricks to be used.
1386  */
1387 void
1388 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1389 {
1390         MonoInst *ins;
1391         MonoRegState *rs = cfg->rs;
1392         int i, val, fpcount;
1393         RegTrack *reginfo, *reginfof;
1394         RegTrack *reginfo1, *reginfo2, *reginfod;
1395         InstList *tmp, *reversed = NULL;
1396         const char *spec;
1397         guint32 src1_mask, src2_mask, dest_mask;
1398         GList *fspill_list = NULL;
1399         int fspill = 0;
1400
1401         if (!bb->code)
1402                 return;
1403         rs->next_vireg = bb->max_ireg;
1404         rs->next_vfreg = bb->max_freg;
1405         mono_regstate_assign (rs);
1406         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1407         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1408         rs->ifree_mask = X86_CALLEE_REGS;
1409
1410         ins = bb->code;
1411
1412         /*if (cfg->opt & MONO_OPT_COPYPROP)
1413                 local_copy_prop (cfg, ins);*/
1414
1415         i = 1;
1416         fpcount = 0;
1417         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1418         /* forward pass on the instructions to collect register liveness info */
1419         while (ins) {
1420                 spec = ins_spec [ins->opcode];
1421                 
1422                 DEBUG (print_ins (i, ins));
1423
1424                 if (spec [MONO_INST_SRC1]) {
1425                         if (spec [MONO_INST_SRC1] == 'f') {
1426                                 GList *spill;
1427                                 reginfo1 = reginfof;
1428
1429                                 spill = g_list_first (fspill_list);
1430                                 if (spill && fpcount < MONO_MAX_FREGS) {
1431                                         reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1432                                         fspill_list = g_list_remove (fspill_list, spill->data);
1433                                 } else
1434                                         fpcount--;
1435                         }
1436                         else
1437                                 reginfo1 = reginfo;
1438                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1439                         reginfo1 [ins->sreg1].last_use = i;
1440                         if (spec [MONO_INST_SRC1] == 'L') {
1441                                 /* The virtual register is allocated sequentially */
1442                                 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1443                                 reginfo1 [ins->sreg1 + 1].last_use = i;
1444                                 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1445                                         reginfo1 [ins->sreg1 + 1].born_in = i;
1446
1447                                 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1448                                 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1449                         }
1450                 } else {
1451                         ins->sreg1 = -1;
1452                 }
1453                 if (spec [MONO_INST_SRC2]) {
1454                         if (spec [MONO_INST_SRC2] == 'f') {
1455                                 GList *spill;
1456                                 reginfo2 = reginfof;
1457                                 spill = g_list_first (fspill_list);
1458                                 if (spill) {
1459                                         reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1460                                         fspill_list = g_list_remove (fspill_list, spill->data);
1461                                         if (fpcount >= MONO_MAX_FREGS) {
1462                                                 fspill++;
1463                                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1464                                                 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1465                                         }
1466                                 } else
1467                                         fpcount--;
1468                         }
1469                         else
1470                                 reginfo2 = reginfo;
1471                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1472                         reginfo2 [ins->sreg2].last_use = i;
1473                         if (spec [MONO_INST_SRC2] == 'L') {
1474                                 /* The virtual register is allocated sequentially */
1475                                 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1476                                 reginfo2 [ins->sreg2 + 1].last_use = i;
1477                                 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1478                                         reginfo2 [ins->sreg2 + 1].born_in = i;
1479                         }
1480                         if (spec [MONO_INST_CLOB] == 's') {
1481                                 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1482                                 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1483                         }
1484                 } else {
1485                         ins->sreg2 = -1;
1486                 }
1487                 if (spec [MONO_INST_DEST]) {
1488                         if (spec [MONO_INST_DEST] == 'f') {
1489                                 reginfod = reginfof;
1490                                 if (fpcount >= MONO_MAX_FREGS) {
1491                                         reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1492                                         fspill++;
1493                                         fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1494                                         fpcount--;
1495                                 }
1496                                 fpcount++;
1497                         }
1498                         else
1499                                 reginfod = reginfo;
1500                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1501                                 reginfod [ins->dreg].killed_in = i;
1502                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1503                         reginfod [ins->dreg].last_use = i;
1504                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1505                                 reginfod [ins->dreg].born_in = i;
1506                         if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
1507                                 /* The virtual register is allocated sequentially */
1508                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1509                                 reginfod [ins->dreg + 1].last_use = i;
1510                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1511                                         reginfod [ins->dreg + 1].born_in = i;
1512
1513                                 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
1514                                 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
1515                         }
1516                 } else {
1517                         ins->dreg = -1;
1518                 }
1519
1520                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
1521                 ++i;
1522                 ins = ins->next;
1523         }
1524
1525         // todo: check if we have anything left on fp stack, in verify mode?
1526         fspill = 0;
1527
1528         DEBUG (print_regtrack (reginfo, rs->next_vireg));
1529         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
1530         tmp = reversed;
1531         while (tmp) {
1532                 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
1533                 dest_mask = src1_mask = src2_mask = X86_CALLEE_REGS;
1534                 --i;
1535                 ins = tmp->data;
1536                 spec = ins_spec [ins->opcode];
1537                 prev_dreg = -1;
1538                 clob_dreg = -1;
1539                 DEBUG (g_print ("processing:"));
1540                 DEBUG (print_ins (i, ins));
1541                 if (spec [MONO_INST_CLOB] == 's') {
1542                         if (rs->ifree_mask & (1 << X86_ECX)) {
1543                                 DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
1544                                 rs->iassign [ins->sreg2] = X86_ECX;
1545                                 rs->isymbolic [X86_ECX] = ins->sreg2;
1546                                 ins->sreg2 = X86_ECX;
1547                                 rs->ifree_mask &= ~ (1 << X86_ECX);
1548                         } else {
1549                                 int need_ecx_spill = TRUE;
1550                                 /* 
1551                                  * we first check if src1/dreg is already assigned a register
1552                                  * and then we force a spill of the var assigned to ECX.
1553                                  */
1554                                 /* the destination register can't be ECX */
1555                                 dest_mask &= ~ (1 << X86_ECX);
1556                                 src1_mask &= ~ (1 << X86_ECX);
1557                                 val = rs->iassign [ins->dreg];
1558                                 /* 
1559                                  * the destination register is already assigned to ECX:
1560                                  * we need to allocate another register for it and then
1561                                  * copy from this to ECX.
1562                                  */
1563                                 if (val == X86_ECX && ins->dreg != ins->sreg2) {
1564                                         int new_dest;
1565                                         new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
1566                                         g_assert (new_dest >= 0);
1567                                         DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
1568
1569                                         rs->isymbolic [new_dest] = ins->dreg;
1570                                         rs->iassign [ins->dreg] = new_dest;
1571                                         clob_dreg = ins->dreg;
1572                                         ins->dreg = new_dest;
1573                                         create_copy_ins (cfg, X86_ECX, new_dest, ins);
1574                                         need_ecx_spill = FALSE;
1575                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
1576                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
1577                                         rs->iassign [ins->dreg] = val;
1578                                         rs->isymbolic [val] = prev_dreg;
1579                                         ins->dreg = val;*/
1580                                 }
1581                                 val = rs->iassign [ins->sreg1];
1582                                 if (val == X86_ECX) {
1583                                         g_assert_not_reached ();
1584                                 } else if (val >= 0) {
1585                                         /* 
1586                                          * the first src reg was already assigned to a register,
1587                                          * we need to copy it to the dest register because the 
1588                                          * shift instruction clobbers the first operand.
1589                                          */
1590                                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, val, NULL);
1591                                         DEBUG (g_print ("\tclob:s moved sreg1 from R%d to R%d\n", val, ins->dreg));
1592                                         insert_before_ins (ins, tmp, copy);
1593                                 }
1594                                 val = rs->iassign [ins->sreg2];
1595                                 if (val >= 0 && val != X86_ECX) {
1596                                         MonoInst *move = create_copy_ins (cfg, X86_ECX, val, NULL);
1597                                         DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
1598                                         move->next = ins;
1599                                         g_assert_not_reached ();
1600                                         /* FIXME: where is move connected to the instruction list? */
1601                                         //tmp->prev->data->next = move;
1602                                 }
1603                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << X86_ECX))) {
1604                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_ECX]));
1605                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_ECX]);
1606                                         mono_regstate_free_int (rs, X86_ECX);
1607                                 }
1608                                 /* force-set sreg2 */
1609                                 rs->iassign [ins->sreg2] = X86_ECX;
1610                                 rs->isymbolic [X86_ECX] = ins->sreg2;
1611                                 ins->sreg2 = X86_ECX;
1612                                 rs->ifree_mask &= ~ (1 << X86_ECX);
1613                         }
1614                 } else if (spec [MONO_INST_CLOB] == 'd') { /* division */
1615                         int dest_reg = X86_EAX;
1616                         int clob_reg = X86_EDX;
1617                         if (spec [MONO_INST_DEST] == 'd') {
1618                                 dest_reg = X86_EDX; /* reminder */
1619                                 clob_reg = X86_EAX;
1620                         }
1621                         val = rs->iassign [ins->dreg];
1622                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
1623                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
1624                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
1625                                 mono_regstate_free_int (rs, dest_reg);
1626                         }
1627                         if (val < 0) {
1628                                 if (val < -1) {
1629                                         /* the register gets spilled after this inst */
1630                                         int spill = -val -1;
1631                                         dest_mask = 1 << clob_reg;
1632                                         prev_dreg = ins->dreg;
1633                                         val = mono_regstate_alloc_int (rs, dest_mask);
1634                                         if (val < 0)
1635                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
1636                                         rs->iassign [ins->dreg] = val;
1637                                         if (spill)
1638                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
1639                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
1640                                         rs->isymbolic [val] = prev_dreg;
1641                                         ins->dreg = val;
1642                                         if (val != dest_reg) { /* force a copy */
1643                                                 create_copy_ins (cfg, val, dest_reg, ins);
1644                                         }
1645                                 } else {
1646                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
1647                                         prev_dreg = ins->dreg;
1648                                         rs->iassign [ins->dreg] = dest_reg;
1649                                         rs->isymbolic [dest_reg] = ins->dreg;
1650                                         ins->dreg = dest_reg;
1651                                         rs->ifree_mask &= ~ (1 << dest_reg);
1652                                 }
1653                         } else {
1654                                 //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
1655                                 if (val != dest_reg) { /* force a copy */
1656                                         create_copy_ins (cfg, val, dest_reg, ins);
1657                                         if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
1658                                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
1659                                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
1660                                                 mono_regstate_free_int (rs, dest_reg);
1661                                         }
1662                                 }
1663                         }
1664                         if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
1665                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
1666                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
1667                                 mono_regstate_free_int (rs, clob_reg);
1668                         }
1669                         src1_mask = 1 << X86_EAX;
1670                         src2_mask = 1 << X86_ECX;
1671                 }
1672                 if (spec [MONO_INST_DEST] == 'l') {
1673                         int hreg;
1674                         val = rs->iassign [ins->dreg];
1675                         /* check special case when dreg have been moved from ecx (clob shift) */
1676                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
1677                                 hreg = clob_dreg + 1;
1678                         else
1679                                 hreg = ins->dreg + 1;
1680
1681                         /* base prev_dreg on fixed hreg, handle clob case */
1682                         val = hreg - 1;
1683
1684                         if (val != rs->isymbolic [X86_EAX] && !(rs->ifree_mask & (1 << X86_EAX))) {
1685                                 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [X86_EAX]));
1686                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
1687                                 mono_regstate_free_int (rs, X86_EAX);
1688                         }
1689                         if (hreg != rs->isymbolic [X86_EDX] && !(rs->ifree_mask & (1 << X86_EDX))) {
1690                                 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [X86_EDX]));
1691                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
1692                                 mono_regstate_free_int (rs, X86_EDX);
1693                         }
1694                 }
1695
1696                 /* Track dreg */
1697                 if (spec [MONO_INST_DEST] == 'f') {
1698                         if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
1699                                 GList *spill_node;
1700                                 MonoInst *store;
1701                                 spill_node = g_list_first (fspill_list);
1702                                 g_assert (spill_node);
1703
1704                                 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
1705                                 insert_before_ins (ins, tmp, store);
1706                                 fspill_list = g_list_remove (fspill_list, spill_node->data);
1707                                 fspill--;
1708                         }
1709                 } else if (spec [MONO_INST_DEST] == 'L') {
1710                         int hreg;
1711                         val = rs->iassign [ins->dreg];
1712                         /* check special case when dreg have been moved from ecx (clob shift) */
1713                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
1714                                 hreg = clob_dreg + 1;
1715                         else
1716                                 hreg = ins->dreg + 1;
1717
1718                         /* base prev_dreg on fixed hreg, handle clob case */
1719                         prev_dreg = hreg - 1;
1720
1721                         if (val < 0) {
1722                                 int spill = 0;
1723                                 if (val < -1) {
1724                                         /* the register gets spilled after this inst */
1725                                         spill = -val -1;
1726                                 }
1727                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
1728                                 rs->iassign [ins->dreg] = val;
1729                                 if (spill)
1730                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
1731                         }
1732
1733                         DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
1734  
1735                         rs->isymbolic [val] = hreg - 1;
1736                         ins->dreg = val;
1737                         
1738                         val = rs->iassign [hreg];
1739                         if (val < 0) {
1740                                 int spill = 0;
1741                                 if (val < -1) {
1742                                         /* the register gets spilled after this inst */
1743                                         spill = -val -1;
1744                                 }
1745                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
1746                                 rs->iassign [hreg] = val;
1747                                 if (spill)
1748                                         create_spilled_store (cfg, spill, val, hreg, ins);
1749                         }
1750
1751                         DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
1752                         rs->isymbolic [val] = hreg;
1753                         /* save reg allocating into unused */
1754                         ins->unused = val;
1755
1756                         /* check if we can free our long reg */
1757                         if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
1758                                 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
1759                                 mono_regstate_free_int (rs, val);
1760                         }
1761                 }
1762                 else if (ins->dreg >= MONO_MAX_IREGS) {
1763                         int hreg;
1764                         val = rs->iassign [ins->dreg];
1765                         if (spec [MONO_INST_DEST] == 'l') {
1766                                 /* check special case when dreg have been moved from ecx (clob shift) */
1767                                 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
1768                                         hreg = clob_dreg + 1;
1769                                 else
1770                                         hreg = ins->dreg + 1;
1771
1772                                 /* base prev_dreg on fixed hreg, handle clob case */
1773                                 prev_dreg = hreg - 1;
1774                         } else
1775                                 prev_dreg = ins->dreg;
1776
1777                         if (val < 0) {
1778                                 int spill = 0;
1779                                 if (val < -1) {
1780                                         /* the register gets spilled after this inst */
1781                                         spill = -val -1;
1782                                 }
1783                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
1784                                 rs->iassign [ins->dreg] = val;
1785                                 if (spill)
1786                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
1787                         }
1788                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
1789                         rs->isymbolic [val] = prev_dreg;
1790                         ins->dreg = val;
1791                         /* handle cases where lreg needs to be eax:edx */
1792                         if (spec [MONO_INST_DEST] == 'l') {
1793                                 /* check special case when dreg have been moved from ecx (clob shift) */
1794                                 int hreg = prev_dreg + 1;
1795                                 val = rs->iassign [hreg];
1796                                 if (val < 0) {
1797                                         int spill = 0;
1798                                         if (val < -1) {
1799                                                 /* the register gets spilled after this inst */
1800                                                 spill = -val -1;
1801                                         }
1802                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
1803                                         rs->iassign [hreg] = val;
1804                                         if (spill)
1805                                                 create_spilled_store (cfg, spill, val, hreg, ins);
1806                                 }
1807                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
1808                                 rs->isymbolic [val] = hreg;
1809                                 if (ins->dreg == X86_EAX) {
1810                                         if (val != X86_EDX)
1811                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1812                                 } else if (ins->dreg == X86_EDX) {
1813                                         if (val == X86_EAX) {
1814                                                 /* swap */
1815                                                 g_assert_not_reached ();
1816                                         } else {
1817                                                 /* two forced copies */
1818                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1819                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1820                                         }
1821                                 } else {
1822                                         if (val == X86_EDX) {
1823                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1824                                         } else {
1825                                                 /* two forced copies */
1826                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1827                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1828                                         }
1829                                 }
1830                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
1831                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
1832                                         mono_regstate_free_int (rs, val);
1833                                 }
1834                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != X86_EAX && spec [MONO_INST_CLOB] != 'd') {
1835                                 /* this instruction only outputs to EAX, need to copy */
1836                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1837                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != X86_EDX && spec [MONO_INST_CLOB] != 'd') {
1838                                 create_copy_ins (cfg, ins->dreg, X86_EDX, ins);
1839                         }
1840                 }
1841                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
1842                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
1843                         mono_regstate_free_int (rs, ins->dreg);
1844                 }
1845                 /* put src1 in EAX if it needs to be */
1846                 if (spec [MONO_INST_SRC1] == 'a') {
1847                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
1848                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
1849                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
1850                                 mono_regstate_free_int (rs, X86_EAX);
1851                         }
1852                         /* force-set sreg1 */
1853                         rs->iassign [ins->sreg1] = X86_EAX;
1854                         rs->isymbolic [X86_EAX] = ins->sreg1;
1855                         ins->sreg1 = X86_EAX;
1856                         rs->ifree_mask &= ~ (1 << X86_EAX);
1857                 }
1858
1859                 /* Track sreg1 */
1860                 if (spec [MONO_INST_SRC1] == 'f') {
1861                         if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
1862                                 MonoInst *load;
1863                                 MonoInst *store = NULL;
1864
1865                                 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
1866                                         GList *spill_node;
1867                                         spill_node = g_list_first (fspill_list);
1868                                         g_assert (spill_node);
1869
1870                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);          
1871                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
1872                                 }
1873
1874                                 fspill++;
1875                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1876                                 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
1877                                 insert_before_ins (ins, tmp, load);
1878                                 if (store) 
1879                                         insert_before_ins (load, tmp, store);
1880                         }
1881                 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
1882                         /* force source to be same as dest */
1883                         rs->iassign [ins->sreg1] = ins->dreg;
1884                         rs->iassign [ins->sreg1 + 1] = ins->unused;
1885                         rs->isymbolic [ins->dreg] = ins->sreg1;
1886                         rs->isymbolic [ins->unused] = ins->sreg1 + 1;
1887
1888                         DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
1889                         DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
1890
1891                         ins->sreg1 = ins->dreg;
1892                         /* 
1893                          * No need for saving the reg, we know that src1=dest in this cases
1894                          * ins->inst_c0 = ins->unused;
1895                          */
1896
1897                         /* make sure that we remove them from free mask */
1898                         rs->ifree_mask &= ~ (1 << ins->dreg);
1899                         rs->ifree_mask &= ~ (1 << ins->unused);
1900                 }
1901                 else if (ins->sreg1 >= MONO_MAX_IREGS) {
1902                         val = rs->iassign [ins->sreg1];
1903                         prev_sreg1 = ins->sreg1;
1904                         if (val < 0) {
1905                                 int spill = 0;
1906                                 if (val < -1) {
1907                                         /* the register gets spilled after this inst */
1908                                         spill = -val -1;
1909                                 }
1910                                 if (0 && ins->opcode == OP_MOVE) {
1911                                         /* 
1912                                          * small optimization: the dest register is already allocated
1913                                          * but the src one is not: we can simply assign the same register
1914                                          * here and peephole will get rid of the instruction later.
1915                                          * This optimization may interfere with the clobbering handling:
1916                                          * it removes a mov operation that will be added again to handle clobbering.
1917                                          * There are also some other issues that should with make testjit.
1918                                          */
1919                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
1920                                         val = rs->iassign [ins->sreg1] = ins->dreg;
1921                                         //g_assert (val >= 0);
1922                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
1923                                 } else {
1924                                         //g_assert (val == -1); /* source cannot be spilled */
1925                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
1926                                         rs->iassign [ins->sreg1] = val;
1927                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
1928                                 }
1929                                 if (spill) {
1930                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
1931                                         insert_before_ins (ins, tmp, store);
1932                                 }
1933                         }
1934                         rs->isymbolic [val] = prev_sreg1;
1935                         ins->sreg1 = val;
1936                 } else {
1937                         prev_sreg1 = -1;
1938                 }
1939                 /* handle clobbering of sreg1 */
1940                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
1941                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
1942                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
1943                         if (ins->sreg2 == -1 || spec [MONO_INST_CLOB] == 's') {
1944                                 /* note: the copy is inserted before the current instruction! */
1945                                 insert_before_ins (ins, tmp, copy);
1946                                 /* we set sreg1 to dest as well */
1947                                 prev_sreg1 = ins->sreg1 = ins->dreg;
1948                         } else {
1949                                 /* inserted after the operation */
1950                                 copy->next = ins->next;
1951                                 ins->next = copy;
1952                         }
1953                 }
1954                 /* track sreg2 */
1955                 if (spec [MONO_INST_SRC2] == 'f') {
1956                         if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
1957                                 MonoInst *load;
1958                                 MonoInst *store = NULL;
1959
1960                                 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
1961                                         GList *spill_node;
1962
1963                                         spill_node = g_list_first (fspill_list);
1964                                         g_assert (spill_node);
1965                                         if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
1966                                                 spill_node = g_list_next (spill_node);
1967         
1968                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
1969                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
1970                                 } 
1971                                 
1972                                 fspill++;
1973                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1974                                 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
1975                                 insert_before_ins (ins, tmp, load);
1976                                 if (store) 
1977                                         insert_before_ins (load, tmp, store);
1978                         }
1979                 } 
1980                 else if (ins->sreg2 >= MONO_MAX_IREGS) {
1981                         val = rs->iassign [ins->sreg2];
1982                         prev_sreg2 = ins->sreg2;
1983                         if (val < 0) {
1984                                 int spill = 0;
1985                                 if (val < -1) {
1986                                         /* the register gets spilled after this inst */
1987                                         spill = -val -1;
1988                                 }
1989                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
1990                                 rs->iassign [ins->sreg2] = val;
1991                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
1992                                 if (spill)
1993                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
1994                         }
1995                         rs->isymbolic [val] = prev_sreg2;
1996                         ins->sreg2 = val;
1997                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != X86_ECX) {
1998                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [X86_ECX]));
1999                         }
2000                 } else {
2001                         prev_sreg2 = -1;
2002                 }
2003
2004                 if (spec [MONO_INST_CLOB] == 'c') {
2005                         int j, s;
2006                         guint32 clob_mask = X86_CALLEE_REGS;
2007                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
2008                                 s = 1 << j;
2009                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2010                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2011                                 }
2012                         }
2013                 }
2014                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2015                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2016                         mono_regstate_free_int (rs, ins->sreg1);
2017                 }
2018                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2019                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2020                         mono_regstate_free_int (rs, ins->sreg2);
2021                 }*/
2022         
2023                 //DEBUG (print_ins (i, ins));
2024                 /* this may result from a insert_before call */
2025                 if (!tmp->next)
2026                         bb->code = tmp->data;
2027                 tmp = tmp->next;
2028         }
2029
2030         g_free (reginfo);
2031         g_free (reginfof);
2032         g_list_free (fspill_list);
2033 }
2034
2035 static unsigned char*
2036 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2037 {
2038         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2039         x86_fnstcw_membase(code, X86_ESP, 0);
2040         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
2041         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2042         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
2043         x86_fldcw_membase (code, X86_ESP, 2);
2044         if (size == 8) {
2045                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2046                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2047                 x86_pop_reg (code, dreg);
2048                 /* FIXME: need the high register 
2049                  * x86_pop_reg (code, dreg_high);
2050                  */
2051         } else {
2052                 x86_push_reg (code, X86_EAX); // SP = SP - 4
2053                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
2054                 x86_pop_reg (code, dreg);
2055         }
2056         x86_fldcw_membase (code, X86_ESP, 0);
2057         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2058
2059         if (size == 1)
2060                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
2061         else if (size == 2)
2062                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
2063         return code;
2064 }
2065
2066 static unsigned char*
2067 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2068 {
2069         int sreg = tree->sreg1;
2070 #ifdef PLATFORM_WIN32
2071         guint8* br[5];
2072
2073         /*
2074          * Under Windows:
2075          * If requested stack size is larger than one page,
2076          * perform stack-touch operation
2077          */
2078         /*
2079          * Generate stack probe code.
2080          * Under Windows, it is necessary to allocate one page at a time,
2081          * "touching" stack after each successful sub-allocation. This is
2082          * because of the way stack growth is implemented - there is a
2083          * guard page before the lowest stack page that is currently commited.
2084          * Stack normally grows sequentially so OS traps access to the
2085          * guard page and commits more pages when needed.
2086          */
2087         x86_test_reg_imm (code, sreg, ~0xFFF);
2088         br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2089
2090         br[2] = code; /* loop */
2091         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
2092         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
2093         x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2094         x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2095         br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2096         x86_patch (br[3], br[2]);
2097         x86_test_reg_reg (code, sreg, sreg);
2098         br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2099         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2100
2101         br[1] = code; x86_jump8 (code, 0);
2102
2103         x86_patch (br[0], code);
2104         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2105         x86_patch (br[1], code);
2106         x86_patch (br[4], code);
2107 #else /* PLATFORM_WIN32 */
2108         x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
2109 #endif
2110         if (tree->flags & MONO_INST_INIT) {
2111                 int offset = 0;
2112                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
2113                         x86_push_reg (code, X86_EAX);
2114                         offset += 4;
2115                 }
2116                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
2117                         x86_push_reg (code, X86_ECX);
2118                         offset += 4;
2119                 }
2120                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
2121                         x86_push_reg (code, X86_EDI);
2122                         offset += 4;
2123                 }
2124                 
2125                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
2126                 if (sreg != X86_ECX)
2127                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
2128                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2129                                 
2130                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2131                 x86_cld (code);
2132                 x86_prefix (code, X86_REP_PREFIX);
2133                 x86_stosl (code);
2134                 
2135                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2136                         x86_pop_reg (code, X86_EDI);
2137                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2138                         x86_pop_reg (code, X86_ECX);
2139                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2140                         x86_pop_reg (code, X86_EAX);
2141         }
2142         return code;
2143 }
2144
2145 #define REAL_PRINT_REG(text,reg) \
2146 mono_assert (reg >= 0); \
2147 x86_push_reg (code, X86_EAX); \
2148 x86_push_reg (code, X86_EDX); \
2149 x86_push_reg (code, X86_ECX); \
2150 x86_push_reg (code, reg); \
2151 x86_push_imm (code, reg); \
2152 x86_push_imm (code, text " %d %p\n"); \
2153 x86_mov_reg_imm (code, X86_EAX, printf); \
2154 x86_call_reg (code, X86_EAX); \
2155 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2156 x86_pop_reg (code, X86_ECX); \
2157 x86_pop_reg (code, X86_EDX); \
2158 x86_pop_reg (code, X86_EAX);
2159
2160 /* benchmark and set based on cpu */
2161 #define LOOP_ALIGNMENT 8
2162 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2163
2164 void
2165 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2166 {
2167         MonoInst *ins;
2168         MonoCallInst *call;
2169         guint offset;
2170         guint8 *code = cfg->native_code + cfg->code_len;
2171         MonoInst *last_ins = NULL;
2172         guint last_offset = 0;
2173         int max_len, cpos;
2174
2175         if (cfg->opt & MONO_OPT_PEEPHOLE)
2176                 peephole_pass (cfg, bb);
2177
2178         if (cfg->opt & MONO_OPT_LOOP) {
2179                 int pad, align = LOOP_ALIGNMENT;
2180                 /* set alignment depending on cpu */
2181                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2182                         pad = align - pad;
2183                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2184                         x86_padding (code, pad);
2185                         cfg->code_len += pad;
2186                         bb->native_offset = cfg->code_len;
2187                 }
2188         }
2189
2190         if (cfg->verbose_level > 2)
2191                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2192
2193         cpos = bb->max_offset;
2194
2195         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2196                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2197                 g_assert (!mono_compile_aot);
2198                 cpos += 6;
2199
2200                 cov->data [bb->dfn].cil_code = bb->cil_code;
2201                 /* this is not thread save, but good enough */
2202                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2203         }
2204
2205         offset = code - cfg->native_code;
2206
2207         ins = bb->code;
2208         while (ins) {
2209                 offset = code - cfg->native_code;
2210
2211                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2212
2213                 if (offset > (cfg->code_size - max_len - 16)) {
2214                         cfg->code_size *= 2;
2215                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2216                         code = cfg->native_code + offset;
2217                         mono_jit_stats.code_reallocs++;
2218                 }
2219
2220                 mono_debug_record_line_number (cfg, ins, offset);
2221
2222                 switch (ins->opcode) {
2223                 case OP_BIGMUL:
2224                         x86_mul_reg (code, ins->sreg2, TRUE);
2225                         break;
2226                 case OP_BIGMUL_UN:
2227                         x86_mul_reg (code, ins->sreg2, FALSE);
2228                         break;
2229                 case OP_X86_SETEQ_MEMBASE:
2230                 case OP_X86_SETNE_MEMBASE:
2231                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2232                                          ins->inst_basereg, ins->inst_offset, TRUE);
2233                         break;
2234                 case OP_STOREI1_MEMBASE_IMM:
2235                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2236                         break;
2237                 case OP_STOREI2_MEMBASE_IMM:
2238                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2239                         break;
2240                 case OP_STORE_MEMBASE_IMM:
2241                 case OP_STOREI4_MEMBASE_IMM:
2242                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2243                         break;
2244                 case OP_STOREI1_MEMBASE_REG:
2245                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2246                         break;
2247                 case OP_STOREI2_MEMBASE_REG:
2248                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2249                         break;
2250                 case OP_STORE_MEMBASE_REG:
2251                 case OP_STOREI4_MEMBASE_REG:
2252                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2253                         break;
2254                 case CEE_LDIND_I:
2255                 case CEE_LDIND_I4:
2256                 case CEE_LDIND_U4:
2257                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2258                         break;
2259                 case OP_LOADU4_MEM:
2260                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2261                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2262                         break;
2263                 case OP_LOAD_MEMBASE:
2264                 case OP_LOADI4_MEMBASE:
2265                 case OP_LOADU4_MEMBASE:
2266                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2267                         break;
2268                 case OP_LOADU1_MEMBASE:
2269                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2270                         break;
2271                 case OP_LOADI1_MEMBASE:
2272                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2273                         break;
2274                 case OP_LOADU2_MEMBASE:
2275                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2276                         break;
2277                 case OP_LOADI2_MEMBASE:
2278                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2279                         break;
2280                 case CEE_CONV_I1:
2281                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2282                         break;
2283                 case CEE_CONV_I2:
2284                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2285                         break;
2286                 case CEE_CONV_U1:
2287                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2288                         break;
2289                 case CEE_CONV_U2:
2290                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2291                         break;
2292                 case OP_COMPARE:
2293                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2294                         break;
2295                 case OP_COMPARE_IMM:
2296                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2297                         break;
2298                 case OP_X86_COMPARE_MEMBASE_REG:
2299                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2300                         break;
2301                 case OP_X86_COMPARE_MEMBASE_IMM:
2302                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2303                         break;
2304                 case OP_X86_COMPARE_MEMBASE8_IMM:
2305                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2306                         break;
2307                 case OP_X86_COMPARE_REG_MEMBASE:
2308                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2309                         break;
2310                 case OP_X86_TEST_NULL:
2311                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2312                         break;
2313                 case OP_X86_ADD_MEMBASE_IMM:
2314                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2315                         break;
2316                 case OP_X86_ADD_MEMBASE:
2317                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2318                         break;
2319                 case OP_X86_SUB_MEMBASE_IMM:
2320                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2321                         break;
2322                 case OP_X86_SUB_MEMBASE:
2323                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2324                         break;
2325                 case OP_X86_INC_MEMBASE:
2326                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2327                         break;
2328                 case OP_X86_INC_REG:
2329                         x86_inc_reg (code, ins->dreg);
2330                         break;
2331                 case OP_X86_DEC_MEMBASE:
2332                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2333                         break;
2334                 case OP_X86_DEC_REG:
2335                         x86_dec_reg (code, ins->dreg);
2336                         break;
2337                 case OP_X86_MUL_MEMBASE:
2338                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2339                         break;
2340                 case CEE_BREAK:
2341                         x86_breakpoint (code);
2342                         break;
2343                 case OP_ADDCC:
2344                 case CEE_ADD:
2345                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2346                         break;
2347                 case OP_ADC:
2348                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2349                         break;
2350                 case OP_ADDCC_IMM:
2351                 case OP_ADD_IMM:
2352                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2353                         break;
2354                 case OP_ADC_IMM:
2355                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2356                         break;
2357                 case OP_SUBCC:
2358                 case CEE_SUB:
2359                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2360                         break;
2361                 case OP_SBB:
2362                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2363                         break;
2364                 case OP_SUBCC_IMM:
2365                 case OP_SUB_IMM:
2366                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2367                         break;
2368                 case OP_SBB_IMM:
2369                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2370                         break;
2371                 case CEE_AND:
2372                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2373                         break;
2374                 case OP_AND_IMM:
2375                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2376                         break;
2377                 case CEE_DIV:
2378                         x86_cdq (code);
2379                         x86_div_reg (code, ins->sreg2, TRUE);
2380                         break;
2381                 case CEE_DIV_UN:
2382                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2383                         x86_div_reg (code, ins->sreg2, FALSE);
2384                         break;
2385                 case OP_DIV_IMM:
2386                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2387                         x86_cdq (code);
2388                         x86_div_reg (code, ins->sreg2, TRUE);
2389                         break;
2390                 case CEE_REM:
2391                         x86_cdq (code);
2392                         x86_div_reg (code, ins->sreg2, TRUE);
2393                         break;
2394                 case CEE_REM_UN:
2395                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2396                         x86_div_reg (code, ins->sreg2, FALSE);
2397                         break;
2398                 case OP_REM_IMM:
2399                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2400                         x86_cdq (code);
2401                         x86_div_reg (code, ins->sreg2, TRUE);
2402                         break;
2403                 case CEE_OR:
2404                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2405                         break;
2406                 case OP_OR_IMM:
2407                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2408                         break;
2409                 case CEE_XOR:
2410                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2411                         break;
2412                 case OP_XOR_IMM:
2413                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2414                         break;
2415                 case CEE_SHL:
2416                         g_assert (ins->sreg2 == X86_ECX);
2417                         x86_shift_reg (code, X86_SHL, ins->dreg);
2418                         break;
2419                 case CEE_SHR:
2420                         g_assert (ins->sreg2 == X86_ECX);
2421                         x86_shift_reg (code, X86_SAR, ins->dreg);
2422                         break;
2423                 case OP_SHR_IMM:
2424                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2425                         break;
2426                 case OP_SHR_UN_IMM:
2427                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2428                         break;
2429                 case CEE_SHR_UN:
2430                         g_assert (ins->sreg2 == X86_ECX);
2431                         x86_shift_reg (code, X86_SHR, ins->dreg);
2432                         break;
2433                 case OP_SHL_IMM:
2434                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2435                         break;
2436                 case OP_LSHL: {
2437                         guint8 *jump_to_end;
2438
2439                         /* handle shifts below 32 bits */
2440                         x86_shld_reg (code, ins->unused, ins->sreg1);
2441                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2442
2443                         x86_test_reg_imm (code, X86_ECX, 32);
2444                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2445
2446                         /* handle shift over 32 bit */
2447                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
2448                         x86_clear_reg (code, ins->sreg1);
2449                         
2450                         x86_patch (jump_to_end, code);
2451                         }
2452                         break;
2453                 case OP_LSHR: {
2454                         guint8 *jump_to_end;
2455
2456                         /* handle shifts below 32 bits */
2457                         x86_shrd_reg (code, ins->sreg1, ins->unused);
2458                         x86_shift_reg (code, X86_SAR, ins->unused);
2459
2460                         x86_test_reg_imm (code, X86_ECX, 32);
2461                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2462
2463                         /* handle shifts over 31 bits */
2464                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2465                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
2466                         
2467                         x86_patch (jump_to_end, code);
2468                         }
2469                         break;
2470                 case OP_LSHR_UN: {
2471                         guint8 *jump_to_end;
2472
2473                         /* handle shifts below 32 bits */
2474                         x86_shrd_reg (code, ins->sreg1, ins->unused);
2475                         x86_shift_reg (code, X86_SHR, ins->unused);
2476
2477                         x86_test_reg_imm (code, X86_ECX, 32);
2478                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2479
2480                         /* handle shifts over 31 bits */
2481                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2482                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
2483                         
2484                         x86_patch (jump_to_end, code);
2485                         }
2486                         break;
2487                 case OP_LSHL_IMM:
2488                         if (ins->inst_imm >= 32) {
2489                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
2490                                 x86_clear_reg (code, ins->sreg1);
2491                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
2492                         } else {
2493                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
2494                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2495                         }
2496                         break;
2497                 case OP_LSHR_IMM:
2498                         if (ins->inst_imm >= 32) {
2499                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
2500                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
2501                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2502                         } else {
2503                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2504                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
2505                         }
2506                         break;
2507                 case OP_LSHR_UN_IMM:
2508                         if (ins->inst_imm >= 32) {
2509                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2510                                 x86_clear_reg (code, ins->unused);
2511                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2512                         } else {
2513                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2514                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
2515                         }
2516                         break;
2517                 case CEE_NOT:
2518                         x86_not_reg (code, ins->sreg1);
2519                         break;
2520                 case CEE_NEG:
2521                         x86_neg_reg (code, ins->sreg1);
2522                         break;
2523                 case OP_SEXT_I1:
2524                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2525                         break;
2526                 case OP_SEXT_I2:
2527                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2528                         break;
2529                 case CEE_MUL:
2530                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2531                         break;
2532                 case OP_MUL_IMM:
2533                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2534                         break;
2535                 case CEE_MUL_OVF:
2536                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2537                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2538                         break;
2539                 case CEE_MUL_OVF_UN: {
2540                         /* the mul operation and the exception check should most likely be split */
2541                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2542                         /*g_assert (ins->sreg2 == X86_EAX);
2543                         g_assert (ins->dreg == X86_EAX);*/
2544                         if (ins->sreg2 == X86_EAX) {
2545                                 non_eax_reg = ins->sreg1;
2546                         } else if (ins->sreg1 == X86_EAX) {
2547                                 non_eax_reg = ins->sreg2;
2548                         } else {
2549                                 /* no need to save since we're going to store to it anyway */
2550                                 if (ins->dreg != X86_EAX) {
2551                                         saved_eax = TRUE;
2552                                         x86_push_reg (code, X86_EAX);
2553                                 }
2554                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2555                                 non_eax_reg = ins->sreg2;
2556                         }
2557                         if (ins->dreg == X86_EDX) {
2558                                 if (!saved_eax) {
2559                                         saved_eax = TRUE;
2560                                         x86_push_reg (code, X86_EAX);
2561                                 }
2562                         } else if (ins->dreg != X86_EAX) {
2563                                 saved_edx = TRUE;
2564                                 x86_push_reg (code, X86_EDX);
2565                         }
2566                         x86_mul_reg (code, non_eax_reg, FALSE);
2567                         /* save before the check since pop and mov don't change the flags */
2568                         if (ins->dreg != X86_EAX)
2569                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2570                         if (saved_edx)
2571                                 x86_pop_reg (code, X86_EDX);
2572                         if (saved_eax)
2573                                 x86_pop_reg (code, X86_EAX);
2574                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2575                         break;
2576                 }
2577                 case OP_ICONST:
2578                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2579                         break;
2580                 case OP_AOTCONST:
2581                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2582                         x86_mov_reg_imm (code, ins->dreg, 0);
2583                         break;
2584                 case CEE_CONV_I4:
2585                 case OP_MOVE:
2586                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2587                         break;
2588                 case CEE_CONV_U4:
2589                         g_assert_not_reached ();
2590                 case CEE_JMP: {
2591                         /*
2592                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2593                          * Keep in sync with the code in emit_epilog.
2594                          */
2595                         int pos = 0;
2596
2597                         /* FIXME: no tracing support... */
2598                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2599                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2600                         /* reset offset to make max_len work */
2601                         offset = code - cfg->native_code;
2602
2603                         g_assert (!cfg->method->save_lmf);
2604
2605                         if (cfg->used_int_regs & (1 << X86_EBX))
2606                                 pos -= 4;
2607                         if (cfg->used_int_regs & (1 << X86_EDI))
2608                                 pos -= 4;
2609                         if (cfg->used_int_regs & (1 << X86_ESI))
2610                                 pos -= 4;
2611                         if (pos)
2612                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2613         
2614                         if (cfg->used_int_regs & (1 << X86_ESI))
2615                                 x86_pop_reg (code, X86_ESI);
2616                         if (cfg->used_int_regs & (1 << X86_EDI))
2617                                 x86_pop_reg (code, X86_EDI);
2618                         if (cfg->used_int_regs & (1 << X86_EBX))
2619                                 x86_pop_reg (code, X86_EBX);
2620         
2621                         /* restore ESP/EBP */
2622                         x86_leave (code);
2623                         offset = code - cfg->native_code;
2624                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2625                         x86_jump32 (code, 0);
2626                         break;
2627                 }
2628                 case OP_CHECK_THIS:
2629                         /* ensure ins->sreg1 is not NULL
2630                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2631                          * cmp DWORD PTR [eax], 0
2632                          */
2633                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2634                         break;
2635                 case OP_ARGLIST: {
2636                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2637                         x86_push_reg (code, hreg);
2638                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2639                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2640                         x86_pop_reg (code, hreg);
2641                         break;
2642                 }
2643                 case OP_FCALL:
2644                 case OP_LCALL:
2645                 case OP_VCALL:
2646                 case OP_VOIDCALL:
2647                 case CEE_CALL:
2648                         call = (MonoCallInst*)ins;
2649                         if (ins->flags & MONO_INST_HAS_METHOD)
2650                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD, call->method);
2651                         else {
2652                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, call->fptr);
2653                         }
2654                         x86_call_code (code, 0);
2655                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
2656                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2657                         break;
2658                 case OP_FCALL_REG:
2659                 case OP_LCALL_REG:
2660                 case OP_VCALL_REG:
2661                 case OP_VOIDCALL_REG:
2662                 case OP_CALL_REG:
2663                         call = (MonoCallInst*)ins;
2664                         x86_call_reg (code, ins->sreg1);
2665                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
2666                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2667                         break;
2668                 case OP_FCALL_MEMBASE:
2669                 case OP_LCALL_MEMBASE:
2670                 case OP_VCALL_MEMBASE:
2671                 case OP_VOIDCALL_MEMBASE:
2672                 case OP_CALL_MEMBASE:
2673                         call = (MonoCallInst*)ins;
2674                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2675                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention))
2676                                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2677                         break;
2678                 case OP_OUTARG:
2679                 case OP_X86_PUSH:
2680                         x86_push_reg (code, ins->sreg1);
2681                         break;
2682                 case OP_X86_PUSH_IMM:
2683                         x86_push_imm (code, ins->inst_imm);
2684                         break;
2685                 case OP_X86_PUSH_MEMBASE:
2686                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2687                         break;
2688                 case OP_X86_PUSH_OBJ: 
2689                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2690                         x86_push_reg (code, X86_EDI);
2691                         x86_push_reg (code, X86_ESI);
2692                         x86_push_reg (code, X86_ECX);
2693                         if (ins->inst_offset)
2694                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2695                         else
2696                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2697                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2698                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2699                         x86_cld (code);
2700                         x86_prefix (code, X86_REP_PREFIX);
2701                         x86_movsd (code);
2702                         x86_pop_reg (code, X86_ECX);
2703                         x86_pop_reg (code, X86_ESI);
2704                         x86_pop_reg (code, X86_EDI);
2705                         break;
2706                 case OP_X86_LEA:
2707                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2708                         break;
2709                 case OP_X86_LEA_MEMBASE:
2710                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2711                         break;
2712                 case OP_X86_XCHG:
2713                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2714                         break;
2715                 case OP_LOCALLOC:
2716                         /* keep alignment */
2717                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2718                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2719                         code = mono_emit_stack_alloc (code, ins);
2720                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2721                         break;
2722                 case CEE_RET:
2723                         x86_ret (code);
2724                         break;
2725                 case CEE_THROW: {
2726                         x86_push_reg (code, ins->sreg1);
2727                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2728                                              (gpointer)"mono_arch_throw_exception");
2729                         x86_call_code (code, 0);
2730                         break;
2731                 }
2732                 case OP_RETHROW: {
2733                         x86_push_reg (code, ins->sreg1);
2734                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2735                                              (gpointer)"mono_arch_rethrow_exception");
2736                         x86_call_code (code, 0);
2737                         break;
2738                 }
2739                 case OP_CALL_HANDLER: 
2740                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2741                         x86_call_imm (code, 0);
2742                         break;
2743                 case OP_LABEL:
2744                         ins->inst_c0 = code - cfg->native_code;
2745                         break;
2746                 case CEE_BR:
2747                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2748                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2749                         //break;
2750                         if (ins->flags & MONO_INST_BRLABEL) {
2751                                 if (ins->inst_i0->inst_c0) {
2752                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2753                                 } else {
2754                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2755                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2756                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2757                                                 x86_jump8 (code, 0);
2758                                         else 
2759                                                 x86_jump32 (code, 0);
2760                                 }
2761                         } else {
2762                                 if (ins->inst_target_bb->native_offset) {
2763                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2764                                 } else {
2765                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2766                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2767                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2768                                                 x86_jump8 (code, 0);
2769                                         else 
2770                                                 x86_jump32 (code, 0);
2771                                 } 
2772                         }
2773                         break;
2774                 case OP_BR_REG:
2775                         x86_jump_reg (code, ins->sreg1);
2776                         break;
2777                 case OP_CEQ:
2778                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2779                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2780                         break;
2781                 case OP_CLT:
2782                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2783                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2784                         break;
2785                 case OP_CLT_UN:
2786                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2787                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2788                         break;
2789                 case OP_CGT:
2790                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2791                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2792                         break;
2793                 case OP_CGT_UN:
2794                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2795                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2796                         break;
2797                 case OP_CNE:
2798                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2799                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2800                         break;
2801                 case OP_COND_EXC_EQ:
2802                 case OP_COND_EXC_NE_UN:
2803                 case OP_COND_EXC_LT:
2804                 case OP_COND_EXC_LT_UN:
2805                 case OP_COND_EXC_GT:
2806                 case OP_COND_EXC_GT_UN:
2807                 case OP_COND_EXC_GE:
2808                 case OP_COND_EXC_GE_UN:
2809                 case OP_COND_EXC_LE:
2810                 case OP_COND_EXC_LE_UN:
2811                 case OP_COND_EXC_OV:
2812                 case OP_COND_EXC_NO:
2813                 case OP_COND_EXC_C:
2814                 case OP_COND_EXC_NC:
2815                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
2816                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2817                         break;
2818                 case CEE_BEQ:
2819                 case CEE_BNE_UN:
2820                 case CEE_BLT:
2821                 case CEE_BLT_UN:
2822                 case CEE_BGT:
2823                 case CEE_BGT_UN:
2824                 case CEE_BGE:
2825                 case CEE_BGE_UN:
2826                 case CEE_BLE:
2827                 case CEE_BLE_UN:
2828                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2829                         break;
2830
2831                 /* floating point opcodes */
2832                 case OP_R8CONST: {
2833                         double d = *(double *)ins->inst_p0;
2834
2835                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2836                                 x86_fldz (code);
2837                         } else if (d == 1.0) {
2838                                 x86_fld1 (code);
2839                         } else {
2840                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R8, ins->inst_p0);
2841                                 x86_fld (code, NULL, TRUE);
2842                         }
2843                         break;
2844                 }
2845                 case OP_R4CONST: {
2846                         float f = *(float *)ins->inst_p0;
2847
2848                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2849                                 x86_fldz (code);
2850                         } else if (f == 1.0) {
2851                                 x86_fld1 (code);
2852                         } else {
2853                                 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_R4, ins->inst_p0);
2854                                 x86_fld (code, NULL, FALSE);
2855                         }
2856                         break;
2857                 }
2858                 case OP_STORER8_MEMBASE_REG:
2859                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2860                         break;
2861                 case OP_LOADR8_SPILL_MEMBASE:
2862                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2863                         x86_fxch (code, 1);
2864                         break;
2865                 case OP_LOADR8_MEMBASE:
2866                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2867                         break;
2868                 case OP_STORER4_MEMBASE_REG:
2869                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2870                         break;
2871                 case OP_LOADR4_MEMBASE:
2872                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2873                         break;
2874                 case CEE_CONV_R4: /* FIXME: change precision */
2875                 case CEE_CONV_R8:
2876                         x86_push_reg (code, ins->sreg1);
2877                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2878                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2879                         break;
2880                 case OP_X86_FP_LOAD_I8:
2881                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2882                         break;
2883                 case OP_X86_FP_LOAD_I4:
2884                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2885                         break;
2886                 case OP_FCONV_TO_I1:
2887                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2888                         break;
2889                 case OP_FCONV_TO_U1:
2890                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2891                         break;
2892                 case OP_FCONV_TO_I2:
2893                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2894                         break;
2895                 case OP_FCONV_TO_U2:
2896                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2897                         break;
2898                 case OP_FCONV_TO_I4:
2899                 case OP_FCONV_TO_I:
2900                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2901                         break;
2902                 case OP_FCONV_TO_I8:
2903                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2904                         x86_fnstcw_membase(code, X86_ESP, 0);
2905                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2906                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2907                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2908                         x86_fldcw_membase (code, X86_ESP, 2);
2909                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2910                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2911                         x86_pop_reg (code, ins->dreg);
2912                         x86_pop_reg (code, ins->unused);
2913                         x86_fldcw_membase (code, X86_ESP, 0);
2914                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2915                         break;
2916                 case OP_LCONV_TO_R_UN: { 
2917                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2918                         guint8 *br;
2919
2920                         /* load 64bit integer to FP stack */
2921                         x86_push_imm (code, 0);
2922                         x86_push_reg (code, ins->sreg2);
2923                         x86_push_reg (code, ins->sreg1);
2924                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2925                         /* store as 80bit FP value */
2926                         x86_fst80_membase (code, X86_ESP, 0);
2927                         
2928                         /* test if lreg is negative */
2929                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2930                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2931         
2932                         /* add correction constant mn */
2933                         x86_fld80_mem (code, mn);
2934                         x86_fld80_membase (code, X86_ESP, 0);
2935                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2936                         x86_fst80_membase (code, X86_ESP, 0);
2937
2938                         x86_patch (br, code);
2939
2940                         x86_fld80_membase (code, X86_ESP, 0);
2941                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2942
2943                         break;
2944                 }
2945                 case OP_LCONV_TO_OVF_I: {
2946                         guint8 *br [3], *label [1];
2947
2948                         /* 
2949                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2950                          */
2951                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2952
2953                         /* If the low word top bit is set, see if we are negative */
2954                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2955                         /* We are not negative (no top bit set, check for our top word to be zero */
2956                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2957                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2958                         label [0] = code;
2959
2960                         /* throw exception */
2961                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2962                         x86_jump32 (code, 0);
2963         
2964                         x86_patch (br [0], code);
2965                         /* our top bit is set, check that top word is 0xfffffff */
2966                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2967                 
2968                         x86_patch (br [1], code);
2969                         /* nope, emit exception */
2970                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2971                         x86_patch (br [2], label [0]);
2972
2973                         if (ins->dreg != ins->sreg1)
2974                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2975                         break;
2976                 }
2977                 case OP_FADD:
2978                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2979                         break;
2980                 case OP_FSUB:
2981                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2982                         break;          
2983                 case OP_FMUL:
2984                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2985                         break;          
2986                 case OP_FDIV:
2987                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2988                         break;          
2989                 case OP_FNEG:
2990                         x86_fchs (code);
2991                         break;          
2992                 case OP_SIN:
2993                         x86_fsin (code);
2994                         x86_fldz (code);
2995                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2996                         break;          
2997                 case OP_COS:
2998                         x86_fcos (code);
2999                         x86_fldz (code);
3000                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3001                         break;          
3002                 case OP_ABS:
3003                         x86_fabs (code);
3004                         break;          
3005                 case OP_TAN: {
3006                         /* 
3007                          * it really doesn't make sense to inline all this code,
3008                          * it's here just to show that things may not be as simple 
3009                          * as they appear.
3010                          */
3011                         guchar *check_pos, *end_tan, *pop_jump;
3012                         x86_push_reg (code, X86_EAX);
3013                         x86_fptan (code);
3014                         x86_fnstsw (code);
3015                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3016                         check_pos = code;
3017                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3018                         x86_fstp (code, 0); /* pop the 1.0 */
3019                         end_tan = code;
3020                         x86_jump8 (code, 0);
3021                         x86_fldpi (code);
3022                         x86_fp_op (code, X86_FADD, 0);
3023                         x86_fxch (code, 1);
3024                         x86_fprem1 (code);
3025                         x86_fstsw (code);
3026                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3027                         pop_jump = code;
3028                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3029                         x86_fstp (code, 1);
3030                         x86_fptan (code);
3031                         x86_patch (pop_jump, code);
3032                         x86_fstp (code, 0); /* pop the 1.0 */
3033                         x86_patch (check_pos, code);
3034                         x86_patch (end_tan, code);
3035                         x86_fldz (code);
3036                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3037                         x86_pop_reg (code, X86_EAX);
3038                         break;
3039                 }
3040                 case OP_ATAN:
3041                         x86_fld1 (code);
3042                         x86_fpatan (code);
3043                         x86_fldz (code);
3044                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3045                         break;          
3046                 case OP_SQRT:
3047                         x86_fsqrt (code);
3048                         break;          
3049                 case OP_X86_FPOP:
3050                         x86_fstp (code, 0);
3051                         break;          
3052                 case OP_FREM: {
3053                         guint8 *l1, *l2;
3054
3055                         x86_push_reg (code, X86_EAX);
3056                         /* we need to exchange ST(0) with ST(1) */
3057                         x86_fxch (code, 1);
3058
3059                         /* this requires a loop, because fprem somtimes 
3060                          * returns a partial remainder */
3061                         l1 = code;
3062                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3063                         /* x86_fprem1 (code); */
3064                         x86_fprem (code);
3065                         x86_fnstsw (code);
3066                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3067                         l2 = code + 2;
3068                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3069
3070                         /* pop result */
3071                         x86_fstp (code, 1);
3072
3073                         x86_pop_reg (code, X86_EAX);
3074                         break;
3075                 }
3076                 case OP_FCOMPARE:
3077                         if (cfg->opt & MONO_OPT_FCMOV) {
3078                                 x86_fcomip (code, 1);
3079                                 x86_fstp (code, 0);
3080                                 break;
3081                         }
3082                         /* this overwrites EAX */
3083                         EMIT_FPCOMPARE(code);
3084                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3085                         break;
3086                 case OP_FCEQ:
3087                         if (cfg->opt & MONO_OPT_FCMOV) {
3088                                 /* zeroing the register at the start results in 
3089                                  * shorter and faster code (we can also remove the widening op)
3090                                  */
3091                                 guchar *unordered_check;
3092                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3093                                 x86_fcomip (code, 1);
3094                                 x86_fstp (code, 0);
3095                                 unordered_check = code;
3096                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3097                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3098                                 x86_patch (unordered_check, code);
3099                                 break;
3100                         }
3101                         if (ins->dreg != X86_EAX) 
3102                                 x86_push_reg (code, X86_EAX);
3103
3104                         EMIT_FPCOMPARE(code);
3105                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3106                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3107                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3108                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3109
3110                         if (ins->dreg != X86_EAX) 
3111                                 x86_pop_reg (code, X86_EAX);
3112                         break;
3113                 case OP_FCLT:
3114                 case OP_FCLT_UN:
3115                         if (cfg->opt & MONO_OPT_FCMOV) {
3116                                 /* zeroing the register at the start results in 
3117                                  * shorter and faster code (we can also remove the widening op)
3118                                  */
3119                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3120                                 x86_fcomip (code, 1);
3121                                 x86_fstp (code, 0);
3122                                 if (ins->opcode == OP_FCLT_UN) {
3123                                         guchar *unordered_check = code;
3124                                         guchar *jump_to_end;
3125                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3126                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3127                                         jump_to_end = code;
3128                                         x86_jump8 (code, 0);
3129                                         x86_patch (unordered_check, code);
3130                                         x86_inc_reg (code, ins->dreg);
3131                                         x86_patch (jump_to_end, code);
3132                                 } else {
3133                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3134                                 }
3135                                 break;
3136                         }
3137                         if (ins->dreg != X86_EAX) 
3138                                 x86_push_reg (code, X86_EAX);
3139
3140                         EMIT_FPCOMPARE(code);
3141                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3142                         if (ins->opcode == OP_FCLT_UN) {
3143                                 guchar *is_not_zero_check, *end_jump;
3144                                 is_not_zero_check = code;
3145                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3146                                 end_jump = code;
3147                                 x86_jump8 (code, 0);
3148                                 x86_patch (is_not_zero_check, code);
3149                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3150
3151                                 x86_patch (end_jump, code);
3152                         }
3153                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3154                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3155
3156                         if (ins->dreg != X86_EAX) 
3157                                 x86_pop_reg (code, X86_EAX);
3158                         break;
3159                 case OP_FCGT:
3160                 case OP_FCGT_UN:
3161                         if (cfg->opt & MONO_OPT_FCMOV) {
3162                                 /* zeroing the register at the start results in 
3163                                  * shorter and faster code (we can also remove the widening op)
3164                                  */
3165                                 guchar *unordered_check;
3166                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3167                                 x86_fcomip (code, 1);
3168                                 x86_fstp (code, 0);
3169                                 if (ins->opcode == OP_FCGT) {
3170                                         unordered_check = code;
3171                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3172                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3173                                         x86_patch (unordered_check, code);
3174                                 } else {
3175                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3176                                 }
3177                                 break;
3178                         }
3179                         if (ins->dreg != X86_EAX) 
3180                                 x86_push_reg (code, X86_EAX);
3181
3182                         EMIT_FPCOMPARE(code);
3183                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3184                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3185                         if (ins->opcode == OP_FCGT_UN) {
3186                                 guchar *is_not_zero_check, *end_jump;
3187                                 is_not_zero_check = code;
3188                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3189                                 end_jump = code;
3190                                 x86_jump8 (code, 0);
3191                                 x86_patch (is_not_zero_check, code);
3192                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3193
3194                                 x86_patch (end_jump, code);
3195                         }
3196                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3197                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3198
3199                         if (ins->dreg != X86_EAX) 
3200                                 x86_pop_reg (code, X86_EAX);
3201                         break;
3202                 case OP_FBEQ:
3203                         if (cfg->opt & MONO_OPT_FCMOV) {
3204                                 guchar *jump = code;
3205                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3206                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3207                                 x86_patch (jump, code);
3208                                 break;
3209                         }
3210                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3211                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3212                         break;
3213                 case OP_FBNE_UN:
3214                         /* Branch if C013 != 100 */
3215                         if (cfg->opt & MONO_OPT_FCMOV) {
3216                                 /* branch if !ZF or (PF|CF) */
3217                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3218                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3219                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3220                                 break;
3221                         }
3222                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3223                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3224                         break;
3225                 case OP_FBLT:
3226                         if (cfg->opt & MONO_OPT_FCMOV) {
3227                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3228                                 break;
3229                         }
3230                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3231                         break;
3232                 case OP_FBLT_UN:
3233                         if (cfg->opt & MONO_OPT_FCMOV) {
3234                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3235                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3236                                 break;
3237                         }
3238                         if (ins->opcode == OP_FBLT_UN) {
3239                                 guchar *is_not_zero_check, *end_jump;
3240                                 is_not_zero_check = code;
3241                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3242                                 end_jump = code;
3243                                 x86_jump8 (code, 0);
3244                                 x86_patch (is_not_zero_check, code);
3245                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3246
3247                                 x86_patch (end_jump, code);
3248                         }
3249                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3250                         break;
3251                 case OP_FBGT:
3252                 case OP_FBGT_UN:
3253                         if (cfg->opt & MONO_OPT_FCMOV) {
3254                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3255                                 break;
3256                         }
3257                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3258                         if (ins->opcode == OP_FBGT_UN) {
3259                                 guchar *is_not_zero_check, *end_jump;
3260                                 is_not_zero_check = code;
3261                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3262                                 end_jump = code;
3263                                 x86_jump8 (code, 0);
3264                                 x86_patch (is_not_zero_check, code);
3265                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3266
3267                                 x86_patch (end_jump, code);
3268                         }
3269                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3270                         break;
3271                 case OP_FBGE:
3272                         /* Branch if C013 == 100 or 001 */
3273                         if (cfg->opt & MONO_OPT_FCMOV) {
3274                                 guchar *br1;
3275
3276                                 /* skip branch if C1=1 */
3277                                 br1 = code;
3278                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3279                                 /* branch if (C0 | C3) = 1 */
3280                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3281                                 x86_patch (br1, code);
3282                                 break;
3283                         }
3284                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3285                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3286                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3287                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3288                         break;
3289                 case OP_FBGE_UN:
3290                         /* Branch if C013 == 000 */
3291                         if (cfg->opt & MONO_OPT_FCMOV) {
3292                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3293                                 break;
3294                         }
3295                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3296                         break;
3297                 case OP_FBLE:
3298                         /* Branch if C013=000 or 100 */
3299                         if (cfg->opt & MONO_OPT_FCMOV) {
3300                                 guchar *br1;
3301
3302                                 /* skip branch if C1=1 */
3303                                 br1 = code;
3304                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3305                                 /* branch if C0=0 */
3306                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3307                                 x86_patch (br1, code);
3308                                 break;
3309                         }
3310                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3311                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3312                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3313                         break;
3314                 case OP_FBLE_UN:
3315                         /* Branch if C013 != 001 */
3316                         if (cfg->opt & MONO_OPT_FCMOV) {
3317                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3318                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3319                                 break;
3320                         }
3321                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3322                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3323                         break;
3324                 case CEE_CKFINITE: {
3325                         x86_push_reg (code, X86_EAX);
3326                         x86_fxam (code);
3327                         x86_fnstsw (code);
3328                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3329                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3330                         x86_pop_reg (code, X86_EAX);
3331                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3332                         break;
3333                 }
3334                 
3335                 case OP_X86_TLS_GET: {
3336                         x86_prefix (code, X86_GS_PREFIX);
3337                         x86_mov_reg_mem (code, ins->dreg, ins->inst_offset, 4);                 
3338                         break;
3339                 }
3340                 default:
3341                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3342                         g_assert_not_reached ();
3343                 }
3344
3345                 if ((code - cfg->native_code - offset) > max_len) {
3346                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3347                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3348                         g_assert_not_reached ();
3349                 }
3350                
3351                 cpos += max_len;
3352
3353                 last_ins = ins;
3354                 last_offset = offset;
3355                 
3356                 ins = ins->next;
3357         }
3358
3359         cfg->code_len = code - cfg->native_code;
3360 }
3361
3362 void
3363 mono_arch_register_lowlevel_calls (void)
3364 {
3365 }
3366
3367 void
3368 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3369 {
3370         MonoJumpInfo *patch_info;
3371
3372         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3373                 unsigned char *ip = patch_info->ip.i + code;
3374                 const unsigned char *target;
3375
3376                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3377
3378                 switch (patch_info->type) {
3379                 case MONO_PATCH_INFO_IP:
3380                         *((gconstpointer *)(ip)) = target;
3381                         continue;
3382                 case MONO_PATCH_INFO_METHOD_REL:
3383                         *((gconstpointer *)(ip)) = target;
3384                         continue;
3385                 case MONO_PATCH_INFO_SWITCH: {
3386                         *((gconstpointer *)(ip + 2)) = target;
3387                         /* we put into the table the absolute address, no need for x86_patch in this case */
3388                         continue;
3389                 }
3390                 case MONO_PATCH_INFO_IID:
3391                         *((guint32 *)(ip + 1)) = (guint32)target;
3392                         continue;                       
3393                 case MONO_PATCH_INFO_CLASS_INIT: {
3394                         guint8 *code = ip;
3395                         /* Might already been changed to a nop */
3396                         x86_call_code (code, 0);
3397                         break;
3398                 }
3399                 case MONO_PATCH_INFO_R4:
3400                 case MONO_PATCH_INFO_R8:
3401                         *((gconstpointer *)(ip + 2)) = target;
3402                         continue;
3403                 case MONO_PATCH_INFO_METHODCONST:
3404                 case MONO_PATCH_INFO_CLASS:
3405                 case MONO_PATCH_INFO_IMAGE:
3406                 case MONO_PATCH_INFO_FIELD:
3407                 case MONO_PATCH_INFO_VTABLE:
3408                 case MONO_PATCH_INFO_SFLDA:
3409                 case MONO_PATCH_INFO_EXC_NAME:
3410                 case MONO_PATCH_INFO_LDSTR:
3411                 case MONO_PATCH_INFO_TYPE_FROM_HANDLE:
3412                 case MONO_PATCH_INFO_LDTOKEN:
3413                         *((gconstpointer *)(ip + 1)) = target;
3414                         continue;
3415                 default:
3416                         break;
3417                 }
3418                 x86_patch (ip, target);
3419         }
3420 }
3421
3422 int
3423 mono_arch_max_epilog_size (MonoCompile *cfg)
3424 {
3425         int exc_count = 0, max_epilog_size = 16;
3426         MonoJumpInfo *patch_info;
3427         
3428         if (cfg->method->save_lmf)
3429                 max_epilog_size += 128;
3430         
3431         if (mono_jit_trace_calls != NULL)
3432                 max_epilog_size += 50;
3433
3434         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3435                 max_epilog_size += 50;
3436
3437         /* count the number of exception infos */
3438      
3439         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3440                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3441                         exc_count++;
3442         }
3443
3444         /* 
3445          * make sure we have enough space for exceptions
3446          * 16 is the size of two push_imm instructions and a call
3447          */
3448         max_epilog_size += exc_count*16;
3449
3450         return max_epilog_size;
3451 }
3452
3453 guint8 *
3454 mono_arch_emit_prolog (MonoCompile *cfg)
3455 {
3456         MonoMethod *method = cfg->method;
3457         MonoBasicBlock *bb;
3458         MonoMethodSignature *sig;
3459         MonoInst *inst;
3460         int alloc_size, pos, max_offset, i;
3461         guint8 *code;
3462
3463         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3464         code = cfg->native_code = g_malloc (cfg->code_size);
3465
3466         x86_push_reg (code, X86_EBP);
3467         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3468
3469         alloc_size = - cfg->stack_offset;
3470         pos = 0;
3471
3472         if (method->save_lmf) {
3473                 pos += sizeof (MonoLMF);
3474
3475                 /* save the current IP */
3476                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3477                 x86_push_imm_template (code);
3478
3479                 /* save all caller saved regs */
3480                 x86_push_reg (code, X86_EBP);
3481                 x86_push_reg (code, X86_ESI);
3482                 x86_push_reg (code, X86_EDI);
3483                 x86_push_reg (code, X86_EBX);
3484
3485                 /* save method info */
3486                 x86_push_imm (code, method);
3487
3488                 /* get the address of lmf for the current thread */
3489                 /* 
3490                  * This is performance critical so we try to use some tricks to make
3491                  * it fast.
3492                  */
3493                 if (lmf_tls_offset != -1) {
3494                         /* Load lmf quicky using the GS register */
3495                         x86_prefix (code, X86_GS_PREFIX);
3496                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3497                 }
3498                 else {
3499                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3500                                                                  (gpointer)"mono_get_lmf_addr");
3501                         x86_call_code (code, 0);
3502                 }
3503
3504                 /* push lmf */
3505                 x86_push_reg (code, X86_EAX); 
3506                 /* push *lfm (previous_lmf) */
3507                 x86_push_membase (code, X86_EAX, 0);
3508                 /* *(lmf) = ESP */
3509                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3510         } else {
3511
3512                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3513                         x86_push_reg (code, X86_EBX);
3514                         pos += 4;
3515                 }
3516
3517                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3518                         x86_push_reg (code, X86_EDI);
3519                         pos += 4;
3520                 }
3521
3522                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3523                         x86_push_reg (code, X86_ESI);
3524                         pos += 4;
3525                 }
3526         }
3527
3528         alloc_size -= pos;
3529
3530         if (alloc_size) {
3531                 /* See mono_emit_stack_alloc */
3532 #ifdef PLATFORM_WIN32
3533                 guint32 remaining_size = alloc_size;
3534                 while (remaining_size >= 0x1000) {
3535                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3536                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3537                         remaining_size -= 0x1000;
3538                 }
3539                 if (remaining_size)
3540                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3541 #else
3542                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3543 #endif
3544         }
3545
3546         /* compute max_offset in order to use short forward jumps */
3547         max_offset = 0;
3548         if (cfg->opt & MONO_OPT_BRANCH) {
3549                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3550                         MonoInst *ins = bb->code;
3551                         bb->max_offset = max_offset;
3552
3553                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3554                                 max_offset += 6;
3555                         /* max alignment for loops */
3556                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3557                                 max_offset += LOOP_ALIGNMENT;
3558
3559                         while (ins) {
3560                                 if (ins->opcode == OP_LABEL)
3561                                         ins->inst_c1 = max_offset;
3562                                 
3563                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3564                                 ins = ins->next;
3565                         }
3566                 }
3567         }
3568
3569         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3570                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3571
3572         /* load arguments allocated to register from the stack */
3573         sig = method->signature;
3574         pos = 0;
3575
3576         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3577                 inst = cfg->varinfo [pos];
3578                 if (inst->opcode == OP_REGVAR) {
3579                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3580                         if (cfg->verbose_level > 2)
3581                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3582                 }
3583                 pos++;
3584         }
3585
3586         cfg->code_len = code - cfg->native_code;
3587
3588         return code;
3589 }
3590
3591 void
3592 mono_arch_emit_epilog (MonoCompile *cfg)
3593 {
3594         MonoJumpInfo *patch_info;
3595         MonoMethod *method = cfg->method;
3596         MonoMethodSignature *sig = method->signature;
3597         int pos;
3598         guint32 stack_to_pop;
3599         guint8 *code;
3600
3601         code = cfg->native_code + cfg->code_len;
3602
3603         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3604                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3605
3606         /* the code restoring the registers must be kept in sync with CEE_JMP */
3607         pos = 0;
3608         
3609         if (method->save_lmf) {
3610                 gint32 prev_lmf_reg;
3611
3612                 /* Find a spare register */
3613                 switch (sig->ret->type) {
3614                 case MONO_TYPE_I8:
3615                 case MONO_TYPE_U8:
3616                         prev_lmf_reg = X86_EDI;
3617                         cfg->used_int_regs |= (1 << X86_EDI);
3618                         break;
3619                 default:
3620                         prev_lmf_reg = X86_EDX;
3621                         break;
3622                 }
3623
3624                 /* reg = previous_lmf */
3625                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, -32, 4);
3626
3627                 /* ecx = lmf */
3628                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, -28, 4);
3629
3630                 /* *(lmf) = previous_lmf */
3631                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3632
3633                 /* restore caller saved regs */
3634                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3635                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, -20, 4);
3636                 }
3637
3638                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3639                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, -16, 4);
3640                 }
3641                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3642                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, -12, 4);
3643                 }
3644
3645                 /* EBP is restored by LEAVE */
3646         } else {
3647                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3648                         pos -= 4;
3649                 }
3650                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3651                         pos -= 4;
3652                 }
3653                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3654                         pos -= 4;
3655                 }
3656
3657                 if (pos)
3658                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3659
3660                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3661                         x86_pop_reg (code, X86_ESI);
3662                 }
3663                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3664                         x86_pop_reg (code, X86_EDI);
3665                 }
3666                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3667                         x86_pop_reg (code, X86_EBX);
3668                 }
3669         }
3670
3671         x86_leave (code);
3672
3673         if (CALLCONV_IS_STDCALL (sig->call_convention)) {
3674                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3675
3676                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3677         } else if (MONO_TYPE_ISSTRUCT (cfg->method->signature->ret))
3678                 stack_to_pop = 4;
3679         else
3680                 stack_to_pop = 0;
3681
3682         if (stack_to_pop)
3683                 x86_ret_imm (code, stack_to_pop);
3684         else
3685                 x86_ret (code);
3686
3687         /* add code to raise exceptions */
3688         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3689                 switch (patch_info->type) {
3690                 case MONO_PATCH_INFO_EXC:
3691                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3692                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);
3693                         x86_push_imm (code, patch_info->data.target);
3694                         mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_METHOD_REL, (gpointer)patch_info->ip.i);
3695                         x86_push_imm (code, patch_info->ip.i + cfg->native_code);
3696                         patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3697                         patch_info->data.name = "mono_arch_throw_exception_by_name";
3698                         patch_info->ip.i = code - cfg->native_code;
3699                         x86_jump_code (code, 0);
3700                         break;
3701                 default:
3702                         /* do nothing */
3703                         break;
3704                 }
3705         }
3706
3707         cfg->code_len = code - cfg->native_code;
3708
3709         g_assert (cfg->code_len < cfg->code_size);
3710
3711 }
3712
3713 void
3714 mono_arch_flush_icache (guint8 *code, gint size)
3715 {
3716         /* not needed */
3717 }
3718
3719 void
3720 mono_arch_flush_register_windows (void)
3721 {
3722 }
3723
3724 /*
3725  * Support for fast access to the thread-local lmf structure using the GS
3726  * segment register on NPTL + kernel 2.6.x.
3727  */
3728
3729 static gboolean tls_offset_inited = FALSE;
3730
3731 /* code should be simply return <tls var>; */
3732 static int read_tls_offset_from_method (void* method)
3733 {
3734         guint8* code = (guint8*) method;
3735         /* 
3736          * Determine the offset of the variable inside the TLS structures
3737          * by disassembling the function.
3738          */
3739
3740         /* gcc-3.3.2
3741          *
3742          * push ebp
3743          * mov ebp, esp
3744          * mov eax, gs:0
3745          * mov eax, DWORD PTR [eax+<offset>]
3746          */
3747         if (
3748                 (code [0] == 0x55) && (code [1] == 0x89) && (code [2] == 0xe5) &&
3749                 (code [3] == 0x65) && (code [4] == 0xa1) && (code [5] == 0x00) &&
3750                 (code [6] == 0x00) && (code [7] == 0x00) && (code [8] == 0x00) &&
3751                 (code [9] == 0x8b) && (code [10] == 0x80)) {
3752                 return *(int*)&(code [11]);
3753         }
3754         
3755         /* gcc-3.4
3756          *
3757          * push ebp
3758          * mov ebp, esp
3759          * mov eax, gs:<offset>
3760          */
3761         if (
3762                 (code [0] == 0x55) && (code [1] == 0x89) && (code [2] == 0xe5) &&
3763                 (code [3] == 0x65) && (code [4] == 0xa1)) {
3764                 return *(int*)&(code [5]);
3765         }
3766         
3767         /* 3.2.2 with -march=athlon
3768          *
3769          * push ebp
3770          * mov eax, gs:<offset>
3771          * mov ebp, esp
3772          */
3773         if (
3774                 (code [0] == 0x55) && (code [1] == 0x65) && (code [2] == 0xa1)) {
3775                 return *(int*)&(code [3]);
3776         }
3777         
3778         return -1;
3779 }
3780 void
3781 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3782 {
3783 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
3784         pthread_t self = pthread_self();
3785         pthread_attr_t attr;
3786         void *staddr = NULL;
3787         size_t stsize = 0;
3788         struct sigaltstack sa;
3789 #endif
3790
3791         if (!tls_offset_inited) {
3792                 tls_offset_inited = TRUE;
3793                 if (getenv ("MONO_NPTL")) {
3794                         lmf_tls_offset = read_tls_offset_from_method (mono_get_lmf_addr);
3795                         appdomain_tls_offset = read_tls_offset_from_method (mono_domain_get);
3796                         thread_tls_offset = read_tls_offset_from_method (mono_thread_current);
3797                 }
3798         }               
3799
3800 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
3801
3802         /* Determine stack boundaries */
3803         if (!mono_running_on_valgrind ()) {
3804 #ifdef HAVE_PTHREAD_GETATTR_NP
3805                 pthread_getattr_np( self, &attr );
3806 #else
3807 #ifdef HAVE_PTHREAD_ATTR_GET_NP
3808                 pthread_attr_get_np( self, &attr );
3809 #elif defined(sun)
3810                 pthread_attr_init( &attr );
3811                 pthread_attr_getstacksize( &attr, &stsize );
3812 #else
3813 #error "Not implemented"
3814 #endif
3815 #endif
3816 #ifndef sun
3817                 pthread_attr_getstack( &attr, &staddr, &stsize );
3818 #endif
3819         }
3820
3821         /* 
3822          * staddr seems to be wrong for the main thread, so we keep the value in
3823          * tls->end_of_stack
3824          */
3825         tls->stack_size = stsize;
3826
3827         /* Setup an alternate signal stack */
3828         tls->signal_stack = g_malloc (SIGNAL_STACK_SIZE);
3829         tls->signal_stack_size = SIGNAL_STACK_SIZE;
3830
3831         sa.ss_sp = tls->signal_stack;
3832         sa.ss_size = SIGNAL_STACK_SIZE;
3833         sa.ss_flags = SS_ONSTACK;
3834         sigaltstack (&sa, NULL);
3835 #endif
3836 }
3837
3838 void
3839 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
3840 {
3841 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
3842         struct sigaltstack sa;
3843
3844         sa.ss_sp = tls->signal_stack;
3845         sa.ss_size = SIGNAL_STACK_SIZE;
3846         sa.ss_flags = SS_DISABLE;
3847         sigaltstack  (&sa, NULL);
3848
3849         if (tls->signal_stack)
3850                 g_free (tls->signal_stack);
3851 #endif
3852 }
3853
3854 void
3855 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3856 {
3857
3858         /* add the this argument */
3859         if (this_reg != -1) {
3860                 MonoInst *this;
3861                 MONO_INST_NEW (cfg, this, OP_OUTARG);
3862                 this->type = this_type;
3863                 this->sreg1 = this_reg;
3864                 mono_bblock_add_inst (cfg->cbb, this);
3865         }
3866
3867         if (vt_reg != -1) {
3868                 MonoInst *vtarg;
3869                 MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3870                 vtarg->type = STACK_MP;
3871                 vtarg->sreg1 = vt_reg;
3872                 mono_bblock_add_inst (cfg->cbb, vtarg);
3873         }
3874 }
3875
3876 void
3877 mono_arch_emit_generic_call (MonoCompile *cfg, int slot_reg, int offset, MonoMethod *method)
3878 {
3879         MonoInst *inst;
3880         MonoCallInst *call;
3881
3882         g_assert (method->signature->is_inflated);
3883
3884         MONO_INST_NEW (cfg, inst, OP_X86_PUSH_IMM);
3885         inst->inst_imm = ((MonoMethodInflated *) method)->context;
3886         mono_bblock_add_inst (cfg->cbb, inst);
3887
3888         MONO_INST_NEW (cfg, inst, OP_X86_PUSH_MEMBASE);
3889         inst->inst_offset = offset;
3890         mono_bblock_add_inst (cfg->cbb, inst);
3891
3892         MONO_INST_NEW_CALL (cfg, call, CEE_CALL);
3893         call->inst.dreg = slot_reg;
3894         call->fptr = mini_compile_generic_method;
3895         mono_bblock_add_inst (cfg->cbb, call);
3896
3897         MONO_INST_NEW (cfg, inst, OP_ADD_IMM);
3898         inst->sreg1 = X86_ESP;
3899         inst->dreg = X86_ESP;
3900         inst->inst_imm = 8;
3901         mono_bblock_add_inst (cfg->cbb, inst);
3902 }
3903
3904 gint
3905 mono_arch_get_opcode_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3906 {
3907         if (cmethod->klass == mono_defaults.math_class) {
3908                 if (strcmp (cmethod->name, "Sin") == 0)
3909                         return OP_SIN;
3910                 else if (strcmp (cmethod->name, "Cos") == 0)
3911                         return OP_COS;
3912                 else if (strcmp (cmethod->name, "Tan") == 0)
3913                         return OP_TAN;
3914                 else if (strcmp (cmethod->name, "Atan") == 0)
3915                         return OP_ATAN;
3916                 else if (strcmp (cmethod->name, "Sqrt") == 0)
3917                         return OP_SQRT;
3918                 else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8)
3919                         return OP_ABS;
3920 #if 0
3921                 /* OP_FREM is not IEEE compatible */
3922                 else if (strcmp (cmethod->name, "IEEERemainder") == 0)
3923                         return OP_FREM;
3924 #endif
3925                 else
3926                         return -1;
3927         } else {
3928                 return -1;
3929         }
3930         return -1;
3931 }
3932
3933
3934 gboolean
3935 mono_arch_print_tree (MonoInst *tree, int arity)
3936 {
3937         return 0;
3938 }
3939
3940 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
3941 {
3942         MonoInst* ins;
3943         
3944         if (appdomain_tls_offset == -1)
3945                 return NULL;
3946         
3947         MONO_INST_NEW (cfg, ins, OP_X86_TLS_GET);
3948         ins->inst_offset = appdomain_tls_offset;
3949         return ins;
3950 }
3951
3952 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
3953 {
3954         MonoInst* ins;
3955         
3956         if (thread_tls_offset == -1)
3957                 return NULL;
3958         
3959         MONO_INST_NEW (cfg, ins, OP_X86_TLS_GET);
3960         ins->inst_offset = thread_tls_offset;
3961         return ins;
3962 }