2005-01-06 Zoltan Varga <vargaz@freemail.hu>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14
15 #include <mono/metadata/appdomain.h>
16 #include <mono/metadata/debug-helpers.h>
17 #include <mono/metadata/threads.h>
18 #include <mono/metadata/profiler-private.h>
19 #include <mono/utils/mono-math.h>
20
21 #include "trace.h"
22 #include "mini-x86.h"
23 #include "inssel.h"
24 #include "cpu-pentium.h"
25
26 static gint lmf_tls_offset = -1;
27 static gint appdomain_tls_offset = -1;
28 static gint thread_tls_offset = -1;
29
30 #ifdef PLATFORM_WIN32
31 /* Under windows, the default pinvoke calling convention is stdcall */
32 #define CALLCONV_IS_STDCALL(call_conv) (((call_conv) == MONO_CALL_STDCALL) || ((call_conv) == MONO_CALL_DEFAULT))
33 #else
34 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
35 #endif
36
37 #define SIGNAL_STACK_SIZE (64 * 1024)
38
39 const char*
40 mono_arch_regname (int reg) {
41         switch (reg) {
42         case X86_EAX: return "%eax";
43         case X86_EBX: return "%ebx";
44         case X86_ECX: return "%ecx";
45         case X86_EDX: return "%edx";
46         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
47         case X86_EDI: return "%edi";
48         case X86_ESI: return "%esi";
49         }
50         return "unknown";
51 }
52
53 /*
54  * mono_arch_get_argument_info:
55  * @csig:  a method signature
56  * @param_count: the number of parameters to consider
57  * @arg_info: an array to store the result infos
58  *
59  * Gathers information on parameters such as size, alignment and
60  * padding. arg_info should be large enought to hold param_count + 1 entries. 
61  *
62  * Returns the size of the activation frame.
63  */
64 int
65 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
66 {
67         int k, frame_size = 0;
68         int size, align, pad;
69         int offset = 8;
70
71         if (MONO_TYPE_ISSTRUCT (csig->ret)) { 
72                 frame_size += sizeof (gpointer);
73                 offset += 4;
74         }
75
76         arg_info [0].offset = offset;
77
78         if (csig->hasthis) {
79                 frame_size += sizeof (gpointer);
80                 offset += 4;
81         }
82
83         arg_info [0].size = frame_size;
84
85         for (k = 0; k < param_count; k++) {
86                 
87                 if (csig->pinvoke)
88                         size = mono_type_native_stack_size (csig->params [k], &align);
89                 else
90                         size = mono_type_stack_size (csig->params [k], &align);
91
92                 /* ignore alignment for now */
93                 align = 1;
94
95                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
96                 arg_info [k].pad = pad;
97                 frame_size += size;
98                 arg_info [k + 1].pad = 0;
99                 arg_info [k + 1].size = size;
100                 offset += pad;
101                 arg_info [k + 1].offset = offset;
102                 offset += size;
103         }
104
105         align = MONO_ARCH_FRAME_ALIGNMENT;
106         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
107         arg_info [k].pad = pad;
108
109         return frame_size;
110 }
111
112 static const guchar cpuid_impl [] = {
113         0x55,                           /* push   %ebp */
114         0x89, 0xe5,                     /* mov    %esp,%ebp */
115         0x53,                           /* push   %ebx */
116         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
117         0x0f, 0xa2,                     /* cpuid   */
118         0x50,                           /* push   %eax */
119         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
120         0x89, 0x18,                     /* mov    %ebx,(%eax) */
121         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
122         0x89, 0x08,                     /* mov    %ecx,(%eax) */
123         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
124         0x89, 0x10,                     /* mov    %edx,(%eax) */
125         0x58,                           /* pop    %eax */
126         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
127         0x89, 0x02,                     /* mov    %eax,(%edx) */
128         0x5b,                           /* pop    %ebx */
129         0xc9,                           /* leave   */
130         0xc3,                           /* ret     */
131 };
132
133 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
134
135 static int 
136 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
137 {
138         int have_cpuid = 0;
139         __asm__  __volatile__ (
140                 "pushfl\n"
141                 "popl %%eax\n"
142                 "movl %%eax, %%edx\n"
143                 "xorl $0x200000, %%eax\n"
144                 "pushl %%eax\n"
145                 "popfl\n"
146                 "pushfl\n"
147                 "popl %%eax\n"
148                 "xorl %%edx, %%eax\n"
149                 "andl $0x200000, %%eax\n"
150                 "movl %%eax, %0"
151                 : "=r" (have_cpuid)
152                 :
153                 : "%eax", "%edx"
154         );
155
156         if (have_cpuid) {
157                 CpuidFunc func = (CpuidFunc)cpuid_impl;
158                 func (id, p_eax, p_ebx, p_ecx, p_edx);
159                 /*
160                  * We use this approach because of issues with gcc and pic code, see:
161                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
162                 __asm__ __volatile__ ("cpuid"
163                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
164                         : "a" (id));
165                 */
166                 return 1;
167         }
168         return 0;
169 }
170
171 /*
172  * Initialize the cpu to execute managed code.
173  */
174 void
175 mono_arch_cpu_init (void)
176 {
177         guint16 fpcw;
178
179         /* spec compliance requires running with double precision */
180         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
181         fpcw &= ~X86_FPCW_PRECC_MASK;
182         fpcw |= X86_FPCW_PREC_DOUBLE;
183         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
184         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
185
186 }
187
188 /*
189  * This function returns the optimizations supported on this cpu.
190  */
191 guint32
192 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
193 {
194         int eax, ebx, ecx, edx;
195         guint32 opts = 0;
196         
197         *exclude_mask = 0;
198         /* Feature Flags function, flags returned in EDX. */
199         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
200                 if (edx & (1 << 15)) {
201                         opts |= MONO_OPT_CMOV;
202                         if (edx & 1)
203                                 opts |= MONO_OPT_FCMOV;
204                         else
205                                 *exclude_mask |= MONO_OPT_FCMOV;
206                 } else
207                         *exclude_mask |= MONO_OPT_CMOV;
208         }
209         return opts;
210 }
211
212 /*
213  * Determine whenever the trap whose info is in SIGINFO is caused by
214  * integer overflow.
215  */
216 gboolean
217 mono_arch_is_int_overflow (void *sigctx, void *info)
218 {
219         struct sigcontext *ctx = (struct sigcontext*)sigctx;
220         guint8* ip;
221
222         ip = (guint8*)ctx->SC_EIP;
223
224         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
225                 gint32 reg;
226
227                 /* idiv REG */
228                 switch (x86_modrm_rm (ip [1])) {
229                 case X86_ECX:
230                         reg = ctx->SC_ECX;
231                         break;
232                 case X86_EBX:
233                         reg = ctx->SC_EBX;
234                         break;
235                 default:
236                         g_assert_not_reached ();
237                         reg = -1;
238                 }
239
240                 if (reg == -1)
241                         return TRUE;
242         }
243                         
244         return FALSE;
245 }
246
247 static gboolean
248 is_regsize_var (MonoType *t) {
249         if (t->byref)
250                 return TRUE;
251         switch (mono_type_get_underlying_type (t)->type) {
252         case MONO_TYPE_I4:
253         case MONO_TYPE_U4:
254         case MONO_TYPE_I:
255         case MONO_TYPE_U:
256         case MONO_TYPE_PTR:
257                 return TRUE;
258         case MONO_TYPE_OBJECT:
259         case MONO_TYPE_STRING:
260         case MONO_TYPE_CLASS:
261         case MONO_TYPE_SZARRAY:
262         case MONO_TYPE_ARRAY:
263                 return TRUE;
264         case MONO_TYPE_VALUETYPE:
265                 return FALSE;
266         }
267         return FALSE;
268 }
269
270 GList *
271 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
272 {
273         GList *vars = NULL;
274         int i;
275
276         for (i = 0; i < cfg->num_varinfo; i++) {
277                 MonoInst *ins = cfg->varinfo [i];
278                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
279
280                 /* unused vars */
281                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
282                         continue;
283
284                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
285                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
286                         continue;
287
288                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
289                  * 8bit quantities in caller saved registers on x86 */
290                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
291                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
292                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
293                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
294                         g_assert (i == vmv->idx);
295                         vars = g_list_prepend (vars, vmv);
296                 }
297         }
298
299         vars = mono_varlist_sort (cfg, vars, 0);
300
301         return vars;
302 }
303
304 GList *
305 mono_arch_get_global_int_regs (MonoCompile *cfg)
306 {
307         GList *regs = NULL;
308
309         /* we can use 3 registers for global allocation */
310         regs = g_list_prepend (regs, (gpointer)X86_EBX);
311         regs = g_list_prepend (regs, (gpointer)X86_ESI);
312         regs = g_list_prepend (regs, (gpointer)X86_EDI);
313
314         return regs;
315 }
316
317 /*
318  * mono_arch_regalloc_cost:
319  *
320  *  Return the cost, in number of memory references, of the action of 
321  * allocating the variable VMV into a register during global register
322  * allocation.
323  */
324 guint32
325 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
326 {
327         MonoInst *ins = cfg->varinfo [vmv->idx];
328
329         if (cfg->method->save_lmf)
330                 /* The register is already saved */
331                 return (ins->opcode == OP_ARG) ? 1 : 0;
332         else
333                 /* push+pop+possible load if it is an argument */
334                 return (ins->opcode == OP_ARG) ? 3 : 2;
335 }
336  
337 /*
338  * Set var information according to the calling convention. X86 version.
339  * The locals var stuff should most likely be split in another method.
340  */
341 void
342 mono_arch_allocate_vars (MonoCompile *m)
343 {
344         MonoMethodSignature *sig;
345         MonoMethodHeader *header;
346         MonoInst *inst;
347         int i, offset, size, align, curinst;
348
349         header = mono_method_get_header (m->method);
350
351         sig = m->method->signature;
352
353         offset = 8;
354         curinst = 0;
355         if (MONO_TYPE_ISSTRUCT (sig->ret)) {
356                 m->ret->opcode = OP_REGOFFSET;
357                 m->ret->inst_basereg = X86_EBP;
358                 m->ret->inst_offset = offset;
359                 offset += sizeof (gpointer);
360         } else {
361                 /* FIXME: handle long and FP values */
362                 switch (sig->ret->type) {
363                 case MONO_TYPE_VOID:
364                         break;
365                 default:
366                         m->ret->opcode = OP_REGVAR;
367                         m->ret->inst_c0 = X86_EAX;
368                         break;
369                 }
370         }
371         if (sig->hasthis) {
372                 inst = m->varinfo [curinst];
373                 if (inst->opcode != OP_REGVAR) {
374                         inst->opcode = OP_REGOFFSET;
375                         inst->inst_basereg = X86_EBP;
376                 }
377                 inst->inst_offset = offset;
378                 offset += sizeof (gpointer);
379                 curinst++;
380         }
381
382         if (sig->call_convention == MONO_CALL_VARARG) {
383                 m->sig_cookie = offset;
384                 offset += sizeof (gpointer);
385         }
386
387         for (i = 0; i < sig->param_count; ++i) {
388                 inst = m->varinfo [curinst];
389                 if (inst->opcode != OP_REGVAR) {
390                         inst->opcode = OP_REGOFFSET;
391                         inst->inst_basereg = X86_EBP;
392                 }
393                 inst->inst_offset = offset;
394                 size = mono_type_size (sig->params [i], &align);
395                 size += 4 - 1;
396                 size &= ~(4 - 1);
397                 offset += size;
398                 curinst++;
399         }
400
401         offset = 0;
402
403         /* reserve space to save LMF and caller saved registers */
404
405         if (m->method->save_lmf) {
406                 offset += sizeof (MonoLMF);
407         } else {
408                 if (m->used_int_regs & (1 << X86_EBX)) {
409                         offset += 4;
410                 }
411
412                 if (m->used_int_regs & (1 << X86_EDI)) {
413                         offset += 4;
414                 }
415
416                 if (m->used_int_regs & (1 << X86_ESI)) {
417                         offset += 4;
418                 }
419         }
420
421         for (i = curinst; i < m->num_varinfo; ++i) {
422                 inst = m->varinfo [i];
423
424                 if ((inst->flags & MONO_INST_IS_DEAD) || inst->opcode == OP_REGVAR)
425                         continue;
426
427                 /* inst->unused indicates native sized value types, this is used by the
428                 * pinvoke wrappers when they call functions returning structure */
429                 if (inst->unused && MONO_TYPE_ISSTRUCT (inst->inst_vtype) && inst->inst_vtype->type != MONO_TYPE_TYPEDBYREF)
430                         size = mono_class_native_size (inst->inst_vtype->data.klass, &align);
431                 else
432                         size = mono_type_size (inst->inst_vtype, &align);
433
434                 offset += size;
435                 offset += align - 1;
436                 offset &= ~(align - 1);
437                 inst->opcode = OP_REGOFFSET;
438                 inst->inst_basereg = X86_EBP;
439                 inst->inst_offset = -offset;
440                 //g_print ("allocating local %d to %d\n", i, -offset);
441         }
442         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
443         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
444
445         /* change sign? */
446         m->stack_offset = -offset;
447 }
448
449 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
450  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
451  */
452
453 /* 
454  * take the arguments and generate the arch-specific
455  * instructions to properly call the function in call.
456  * This includes pushing, moving arguments to the right register
457  * etc.
458  * Issue: who does the spilling if needed, and when?
459  */
460 MonoCallInst*
461 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
462         MonoInst *arg, *in;
463         MonoMethodSignature *sig;
464         int i, n, stack_size, type;
465         MonoType *ptype;
466
467         stack_size = 0;
468         /* add the vararg cookie before the non-implicit args */
469         if (call->signature->call_convention == MONO_CALL_VARARG) {
470                 MonoInst *sig_arg;
471                 /* FIXME: Add support for signature tokens to AOT */
472                 cfg->disable_aot = TRUE;
473                 MONO_INST_NEW (cfg, arg, OP_OUTARG);
474                 MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
475                 sig_arg->inst_p0 = call->signature;
476                 arg->inst_left = sig_arg;
477                 arg->type = STACK_PTR;
478                 /* prepend, so they get reversed */
479                 arg->next = call->out_args;
480                 call->out_args = arg;
481                 stack_size += sizeof (gpointer);
482         }
483         sig = call->signature;
484         n = sig->param_count + sig->hasthis;
485
486         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
487                 stack_size += sizeof (gpointer);
488         for (i = 0; i < n; ++i) {
489                 if (is_virtual && i == 0) {
490                         /* the argument will be attached to the call instrucion */
491                         in = call->args [i];
492                         stack_size += 4;
493                 } else {
494                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
495                         in = call->args [i];
496                         arg->cil_code = in->cil_code;
497                         arg->inst_left = in;
498                         arg->type = in->type;
499                         /* prepend, so they get reversed */
500                         arg->next = call->out_args;
501                         call->out_args = arg;
502                         if (i >= sig->hasthis) {
503                                 MonoType *t = sig->params [i - sig->hasthis];
504                                 ptype = mono_type_get_underlying_type (t);
505                                 if (t->byref)
506                                         type = MONO_TYPE_U;
507                                 else
508                                         type = ptype->type;
509                                 /* FIXME: validate arguments... */
510                                 switch (type) {
511                                 case MONO_TYPE_I:
512                                 case MONO_TYPE_U:
513                                 case MONO_TYPE_BOOLEAN:
514                                 case MONO_TYPE_CHAR:
515                                 case MONO_TYPE_I1:
516                                 case MONO_TYPE_U1:
517                                 case MONO_TYPE_I2:
518                                 case MONO_TYPE_U2:
519                                 case MONO_TYPE_I4:
520                                 case MONO_TYPE_U4:
521                                 case MONO_TYPE_STRING:
522                                 case MONO_TYPE_CLASS:
523                                 case MONO_TYPE_OBJECT:
524                                 case MONO_TYPE_PTR:
525                                 case MONO_TYPE_FNPTR:
526                                 case MONO_TYPE_ARRAY:
527                                 case MONO_TYPE_SZARRAY:
528                                         stack_size += 4;
529                                         break;
530                                 case MONO_TYPE_I8:
531                                 case MONO_TYPE_U8:
532                                         stack_size += 8;
533                                         break;
534                                 case MONO_TYPE_R4:
535                                         stack_size += 4;
536                                         arg->opcode = OP_OUTARG_R4;
537                                         break;
538                                 case MONO_TYPE_R8:
539                                         stack_size += 8;
540                                         arg->opcode = OP_OUTARG_R8;
541                                         break;
542                                 case MONO_TYPE_VALUETYPE: {
543                                         int size;
544                                         if (sig->pinvoke) 
545                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, NULL);
546                                         else 
547                                                 size = mono_type_stack_size (&in->klass->byval_arg, NULL);
548
549                                         stack_size += size;
550                                         arg->opcode = OP_OUTARG_VT;
551                                         arg->klass = in->klass;
552                                         arg->unused = sig->pinvoke;
553                                         arg->inst_imm = size; 
554                                         break;
555                                 }
556                                 case MONO_TYPE_TYPEDBYREF:
557                                         stack_size += sizeof (MonoTypedRef);
558                                         arg->opcode = OP_OUTARG_VT;
559                                         arg->klass = in->klass;
560                                         arg->unused = sig->pinvoke;
561                                         arg->inst_imm = sizeof (MonoTypedRef); 
562                                         break;
563                                 default:
564                                         g_error ("unknown type 0x%02x in mono_arch_call_opcode\n", type);
565                                 }
566                         } else {
567                                 /* the this argument */
568                                 stack_size += 4;
569                         }
570                 }
571         }
572         /* if the function returns a struct, the called method already does a ret $0x4 */
573         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
574                 stack_size -= 4;
575         call->stack_usage = stack_size;
576         /* 
577          * should set more info in call, such as the stack space
578          * used by the args that needs to be added back to esp
579          */
580
581         return call;
582 }
583
584 /*
585  * Allow tracing to work with this interface (with an optional argument)
586  */
587 void*
588 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
589 {
590         guchar *code = p;
591
592         /* if some args are passed in registers, we need to save them here */
593         x86_push_reg (code, X86_EBP);
594
595         if (cfg->compile_aot) {
596                 x86_push_imm (code, cfg->method);
597                 x86_mov_reg_imm (code, X86_EAX, func);
598                 x86_call_reg (code, X86_EAX);
599         } else {
600                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
601                 x86_push_imm (code, cfg->method);
602                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
603                 x86_call_code (code, 0);
604         }
605         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
606
607         return code;
608 }
609
610 enum {
611         SAVE_NONE,
612         SAVE_STRUCT,
613         SAVE_EAX,
614         SAVE_EAX_EDX,
615         SAVE_FP
616 };
617
618 void*
619 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
620 {
621         guchar *code = p;
622         int arg_size = 0, save_mode = SAVE_NONE;
623         MonoMethod *method = cfg->method;
624         
625         switch (mono_type_get_underlying_type (method->signature->ret)->type) {
626         case MONO_TYPE_VOID:
627                 /* special case string .ctor icall */
628                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
629                         save_mode = SAVE_EAX;
630                 else
631                         save_mode = SAVE_NONE;
632                 break;
633         case MONO_TYPE_I8:
634         case MONO_TYPE_U8:
635                 save_mode = SAVE_EAX_EDX;
636                 break;
637         case MONO_TYPE_R4:
638         case MONO_TYPE_R8:
639                 save_mode = SAVE_FP;
640                 break;
641         case MONO_TYPE_VALUETYPE:
642                 save_mode = SAVE_STRUCT;
643                 break;
644         default:
645                 save_mode = SAVE_EAX;
646                 break;
647         }
648
649         switch (save_mode) {
650         case SAVE_EAX_EDX:
651                 x86_push_reg (code, X86_EDX);
652                 x86_push_reg (code, X86_EAX);
653                 if (enable_arguments) {
654                         x86_push_reg (code, X86_EDX);
655                         x86_push_reg (code, X86_EAX);
656                         arg_size = 8;
657                 }
658                 break;
659         case SAVE_EAX:
660                 x86_push_reg (code, X86_EAX);
661                 if (enable_arguments) {
662                         x86_push_reg (code, X86_EAX);
663                         arg_size = 4;
664                 }
665                 break;
666         case SAVE_FP:
667                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
668                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
669                 if (enable_arguments) {
670                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
671                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
672                         arg_size = 8;
673                 }
674                 break;
675         case SAVE_STRUCT:
676                 if (enable_arguments) {
677                         x86_push_membase (code, X86_EBP, 8);
678                         arg_size = 4;
679                 }
680                 break;
681         case SAVE_NONE:
682         default:
683                 break;
684         }
685
686         if (cfg->compile_aot) {
687                 x86_push_imm (code, method);
688                 x86_mov_reg_imm (code, X86_EAX, func);
689                 x86_call_reg (code, X86_EAX);
690         } else {
691                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
692                 x86_push_imm (code, method);
693                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
694                 x86_call_code (code, 0);
695         }
696         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
697
698         switch (save_mode) {
699         case SAVE_EAX_EDX:
700                 x86_pop_reg (code, X86_EAX);
701                 x86_pop_reg (code, X86_EDX);
702                 break;
703         case SAVE_EAX:
704                 x86_pop_reg (code, X86_EAX);
705                 break;
706         case SAVE_FP:
707                 x86_fld_membase (code, X86_ESP, 0, TRUE);
708                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
709                 break;
710         case SAVE_NONE:
711         default:
712                 break;
713         }
714
715         return code;
716 }
717
718 #define EMIT_COND_BRANCH(ins,cond,sign) \
719 if (ins->flags & MONO_INST_BRLABEL) { \
720         if (ins->inst_i0->inst_c0) { \
721                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
722         } else { \
723                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
724                 if ((cfg->opt & MONO_OPT_BRANCH) && \
725                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
726                         x86_branch8 (code, cond, 0, sign); \
727                 else \
728                         x86_branch32 (code, cond, 0, sign); \
729         } \
730 } else { \
731         if (ins->inst_true_bb->native_offset) { \
732                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
733         } else { \
734                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
735                 if ((cfg->opt & MONO_OPT_BRANCH) && \
736                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
737                         x86_branch8 (code, cond, 0, sign); \
738                 else \
739                         x86_branch32 (code, cond, 0, sign); \
740         } \
741 }
742
743 /* emit an exception if condition is fail */
744 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
745         do {                                                        \
746                 mono_add_patch_info (cfg, code - cfg->native_code,   \
747                                     MONO_PATCH_INFO_EXC, exc_name);  \
748                 x86_branch32 (code, cond, 0, signed);               \
749         } while (0); 
750
751 #define EMIT_FPCOMPARE(code) do { \
752         x86_fcompp (code); \
753         x86_fnstsw (code); \
754 } while (0); 
755
756
757 static guint8*
758 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
759 {
760         if (cfg->compile_aot) {
761                 guint32 got_reg = X86_EAX;
762
763                 if (cfg->compile_aot) {          
764                         /*
765                          * Since the patches are generated by the back end, there is
766                          * no way to generate a got_var at this point.
767                          */
768                         g_assert (cfg->got_var);
769
770                         if (cfg->got_var->opcode == OP_REGOFFSET)
771                                 x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
772                         else
773                                 got_reg = cfg->got_var->dreg;
774                 }
775
776                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
777                 x86_call_membase (code, got_reg, 0xf0f0f0f0);
778         }
779         else {
780                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
781                 x86_call_code (code, 0);
782         }
783
784         return code;
785 }
786
787 /* FIXME: Add more instructions */
788 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
789
790 static void
791 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
792 {
793         MonoInst *ins, *last_ins = NULL;
794         ins = bb->code;
795
796         while (ins) {
797
798                 switch (ins->opcode) {
799                 case OP_ICONST:
800                         /* reg = 0 -> XOR (reg, reg) */
801                         /* XOR sets cflags on x86, so we cant do it always */
802                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
803                                 ins->opcode = CEE_XOR;
804                                 ins->sreg1 = ins->dreg;
805                                 ins->sreg2 = ins->dreg;
806                         }
807                         break;
808                 case OP_MUL_IMM: 
809                         /* remove unnecessary multiplication with 1 */
810                         if (ins->inst_imm == 1) {
811                                 if (ins->dreg != ins->sreg1) {
812                                         ins->opcode = OP_MOVE;
813                                 } else {
814                                         last_ins->next = ins->next;
815                                         ins = ins->next;
816                                         continue;
817                                 }
818                         }
819                         break;
820                 case OP_COMPARE_IMM:
821                         /* OP_COMPARE_IMM (reg, 0) 
822                          * --> 
823                          * OP_X86_TEST_NULL (reg) 
824                          */
825                         if (!ins->inst_imm)
826                                 ins->opcode = OP_X86_TEST_NULL;
827                         break;
828                 case OP_X86_COMPARE_MEMBASE_IMM:
829                         /* 
830                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
831                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
832                          * -->
833                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
834                          * OP_COMPARE_IMM reg, imm
835                          *
836                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
837                          */
838                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
839                             ins->inst_basereg == last_ins->inst_destbasereg &&
840                             ins->inst_offset == last_ins->inst_offset) {
841                                         ins->opcode = OP_COMPARE_IMM;
842                                         ins->sreg1 = last_ins->sreg1;
843
844                                         /* check if we can remove cmp reg,0 with test null */
845                                         if (!ins->inst_imm)
846                                                 ins->opcode = OP_X86_TEST_NULL;
847                                 }
848
849                         break;
850                 case OP_LOAD_MEMBASE:
851                 case OP_LOADI4_MEMBASE:
852                         /* 
853                          * Note: if reg1 = reg2 the load op is removed
854                          *
855                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
856                          * OP_LOAD_MEMBASE offset(basereg), reg2
857                          * -->
858                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
859                          * OP_MOVE reg1, reg2
860                          */
861                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
862                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
863                             ins->inst_basereg == last_ins->inst_destbasereg &&
864                             ins->inst_offset == last_ins->inst_offset) {
865                                 if (ins->dreg == last_ins->sreg1) {
866                                         last_ins->next = ins->next;                             
867                                         ins = ins->next;                                
868                                         continue;
869                                 } else {
870                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
871                                         ins->opcode = OP_MOVE;
872                                         ins->sreg1 = last_ins->sreg1;
873                                 }
874
875                         /* 
876                          * Note: reg1 must be different from the basereg in the second load
877                          * Note: if reg1 = reg2 is equal then second load is removed
878                          *
879                          * OP_LOAD_MEMBASE offset(basereg), reg1
880                          * OP_LOAD_MEMBASE offset(basereg), reg2
881                          * -->
882                          * OP_LOAD_MEMBASE offset(basereg), reg1
883                          * OP_MOVE reg1, reg2
884                          */
885                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
886                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
887                               ins->inst_basereg != last_ins->dreg &&
888                               ins->inst_basereg == last_ins->inst_basereg &&
889                               ins->inst_offset == last_ins->inst_offset) {
890
891                                 if (ins->dreg == last_ins->dreg) {
892                                         last_ins->next = ins->next;                             
893                                         ins = ins->next;                                
894                                         continue;
895                                 } else {
896                                         ins->opcode = OP_MOVE;
897                                         ins->sreg1 = last_ins->dreg;
898                                 }
899
900                                 //g_assert_not_reached ();
901
902 #if 0
903                         /* 
904                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
905                          * OP_LOAD_MEMBASE offset(basereg), reg
906                          * -->
907                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
908                          * OP_ICONST reg, imm
909                          */
910                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
911                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
912                                    ins->inst_basereg == last_ins->inst_destbasereg &&
913                                    ins->inst_offset == last_ins->inst_offset) {
914                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
915                                 ins->opcode = OP_ICONST;
916                                 ins->inst_c0 = last_ins->inst_imm;
917                                 g_assert_not_reached (); // check this rule
918 #endif
919                         }
920                         break;
921                 case OP_LOADU1_MEMBASE:
922                 case OP_LOADI1_MEMBASE:
923                         /* 
924                          * Note: if reg1 = reg2 the load op is removed
925                          *
926                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
927                          * OP_LOAD_MEMBASE offset(basereg), reg2
928                          * -->
929                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
930                          * OP_MOVE reg1, reg2
931                          */
932                         if (last_ins && (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
933                                         ins->inst_basereg == last_ins->inst_destbasereg &&
934                                         ins->inst_offset == last_ins->inst_offset) {
935                                 if (ins->dreg == last_ins->sreg1) {
936                                         last_ins->next = ins->next;                             
937                                         ins = ins->next;                                
938                                         continue;
939                                 } else {
940                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
941                                         ins->opcode = OP_MOVE;
942                                         ins->sreg1 = last_ins->sreg1;
943                                 }
944                         }
945                         break;
946                 case OP_LOADU2_MEMBASE:
947                 case OP_LOADI2_MEMBASE:
948                         /* 
949                          * Note: if reg1 = reg2 the load op is removed
950                          *
951                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
952                          * OP_LOAD_MEMBASE offset(basereg), reg2
953                          * -->
954                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
955                          * OP_MOVE reg1, reg2
956                          */
957                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
958                                         ins->inst_basereg == last_ins->inst_destbasereg &&
959                                         ins->inst_offset == last_ins->inst_offset) {
960                                 if (ins->dreg == last_ins->sreg1) {
961                                         last_ins->next = ins->next;                             
962                                         ins = ins->next;                                
963                                         continue;
964                                 } else {
965                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
966                                         ins->opcode = OP_MOVE;
967                                         ins->sreg1 = last_ins->sreg1;
968                                 }
969                         }
970                         break;
971                 case CEE_CONV_I4:
972                 case CEE_CONV_U4:
973                 case OP_MOVE:
974                         /*
975                          * Removes:
976                          *
977                          * OP_MOVE reg, reg 
978                          */
979                         if (ins->dreg == ins->sreg1) {
980                                 if (last_ins)
981                                         last_ins->next = ins->next;                             
982                                 ins = ins->next;
983                                 continue;
984                         }
985                         /* 
986                          * Removes:
987                          *
988                          * OP_MOVE sreg, dreg 
989                          * OP_MOVE dreg, sreg
990                          */
991                         if (last_ins && last_ins->opcode == OP_MOVE &&
992                             ins->sreg1 == last_ins->dreg &&
993                             ins->dreg == last_ins->sreg1) {
994                                 last_ins->next = ins->next;                             
995                                 ins = ins->next;                                
996                                 continue;
997                         }
998                         break;
999                         
1000                 case OP_X86_PUSH_MEMBASE:
1001                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1002                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1003                             ins->inst_basereg == last_ins->inst_destbasereg &&
1004                             ins->inst_offset == last_ins->inst_offset) {
1005                                     ins->opcode = OP_X86_PUSH;
1006                                     ins->sreg1 = last_ins->sreg1;
1007                         }
1008                         break;
1009                 }
1010                 last_ins = ins;
1011                 ins = ins->next;
1012         }
1013         bb->last_ins = last_ins;
1014 }
1015
1016 static const int 
1017 branch_cc_table [] = {
1018         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1019         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1020         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1021 };
1022
1023 #define DEBUG(a) if (cfg->verbose_level > 1) a
1024 //#define DEBUG(a)
1025
1026 /*
1027  * returns the offset used by spillvar. It allocates a new
1028  * spill variable if necessary. 
1029  */
1030 static int
1031 mono_spillvar_offset (MonoCompile *cfg, int spillvar)
1032 {
1033         MonoSpillInfo **si, *info;
1034         int i = 0;
1035
1036         si = &cfg->spill_info; 
1037         
1038         while (i <= spillvar) {
1039
1040                 if (!*si) {
1041                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1042                         info->next = NULL;
1043                         cfg->stack_offset -= sizeof (gpointer);
1044                         info->offset = cfg->stack_offset;
1045                 }
1046
1047                 if (i == spillvar)
1048                         return (*si)->offset;
1049
1050                 i++;
1051                 si = &(*si)->next;
1052         }
1053
1054         g_assert_not_reached ();
1055         return 0;
1056 }
1057
1058 /*
1059  * returns the offset used by spillvar. It allocates a new
1060  * spill float variable if necessary. 
1061  * (same as mono_spillvar_offset but for float)
1062  */
1063 static int
1064 mono_spillvar_offset_float (MonoCompile *cfg, int spillvar)
1065 {
1066         MonoSpillInfo **si, *info;
1067         int i = 0;
1068
1069         si = &cfg->spill_info_float; 
1070         
1071         while (i <= spillvar) {
1072
1073                 if (!*si) {
1074                         *si = info = mono_mempool_alloc (cfg->mempool, sizeof (MonoSpillInfo));
1075                         info->next = NULL;
1076                         cfg->stack_offset -= sizeof (double);
1077                         info->offset = cfg->stack_offset;
1078                 }
1079
1080                 if (i == spillvar)
1081                         return (*si)->offset;
1082
1083                 i++;
1084                 si = &(*si)->next;
1085         }
1086
1087         g_assert_not_reached ();
1088         return 0;
1089 }
1090
1091 /*
1092  * Creates a store for spilled floating point items
1093  */
1094 static MonoInst*
1095 create_spilled_store_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1096 {
1097         MonoInst *store;
1098         MONO_INST_NEW (cfg, store, OP_STORER8_MEMBASE_REG);
1099         store->sreg1 = reg;
1100         store->inst_destbasereg = X86_EBP;
1101         store->inst_offset = mono_spillvar_offset_float (cfg, spill);
1102
1103         DEBUG (g_print ("SPILLED FLOAT STORE (%d at 0x%08x(%%sp)) (from %d)\n", spill, store->inst_offset, reg));
1104         return store;
1105 }
1106
1107 /*
1108  * Creates a load for spilled floating point items 
1109  */
1110 static MonoInst*
1111 create_spilled_load_float (MonoCompile *cfg, int spill, int reg, MonoInst *ins)
1112 {
1113         MonoInst *load;
1114         MONO_INST_NEW (cfg, load, OP_LOADR8_SPILL_MEMBASE);
1115         load->dreg = reg;
1116         load->inst_basereg = X86_EBP;
1117         load->inst_offset = mono_spillvar_offset_float (cfg, spill);
1118
1119         DEBUG (g_print ("SPILLED FLOAT LOAD (%d at 0x%08x(%%sp)) (from %d)\n", spill, load->inst_offset, reg));
1120         return load;
1121 }
1122
1123 #define is_global_ireg(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && !X86_IS_CALLEE ((r)))
1124 #define reg_is_freeable(r) ((r) >= 0 && (r) < MONO_MAX_IREGS && X86_IS_CALLEE ((r)))
1125
1126 typedef struct {
1127         int born_in;
1128         int killed_in;
1129         int last_use;
1130         int prev_use;
1131         int flags;              /* used to track fp spill/load */
1132 } RegTrack;
1133
1134 static const char*const * ins_spec = pentium_desc;
1135
1136 static void
1137 print_ins (int i, MonoInst *ins)
1138 {
1139         const char *spec = ins_spec [ins->opcode];
1140         g_print ("\t%-2d %s", i, mono_inst_name (ins->opcode));
1141         if (spec [MONO_INST_DEST]) {
1142                 if (ins->dreg >= MONO_MAX_IREGS)
1143                         g_print (" R%d <-", ins->dreg);
1144                 else
1145                         g_print (" %s <-", mono_arch_regname (ins->dreg));
1146         }
1147         if (spec [MONO_INST_SRC1]) {
1148                 if (ins->sreg1 >= MONO_MAX_IREGS)
1149                         g_print (" R%d", ins->sreg1);
1150                 else
1151                         g_print (" %s", mono_arch_regname (ins->sreg1));
1152         }
1153         if (spec [MONO_INST_SRC2]) {
1154                 if (ins->sreg2 >= MONO_MAX_IREGS)
1155                         g_print (" R%d", ins->sreg2);
1156                 else
1157                         g_print (" %s", mono_arch_regname (ins->sreg2));
1158         }
1159         if (spec [MONO_INST_CLOB])
1160                 g_print (" clobbers: %c", spec [MONO_INST_CLOB]);
1161         g_print ("\n");
1162 }
1163
1164 static void
1165 print_regtrack (RegTrack *t, int num)
1166 {
1167         int i;
1168         char buf [32];
1169         const char *r;
1170         
1171         for (i = 0; i < num; ++i) {
1172                 if (!t [i].born_in)
1173                         continue;
1174                 if (i >= MONO_MAX_IREGS) {
1175                         g_snprintf (buf, sizeof(buf), "R%d", i);
1176                         r = buf;
1177                 } else
1178                         r = mono_arch_regname (i);
1179                 g_print ("liveness: %s [%d - %d]\n", r, t [i].born_in, t[i].last_use);
1180         }
1181 }
1182
1183 typedef struct InstList InstList;
1184
1185 struct InstList {
1186         InstList *prev;
1187         InstList *next;
1188         MonoInst *data;
1189 };
1190
1191 static inline InstList*
1192 inst_list_prepend (MonoMemPool *pool, InstList *list, MonoInst *data)
1193 {
1194         InstList *item = mono_mempool_alloc (pool, sizeof (InstList));
1195         item->data = data;
1196         item->prev = NULL;
1197         item->next = list;
1198         if (list)
1199                 list->prev = item;
1200         return item;
1201 }
1202
1203 /*
1204  * Force the spilling of the variable in the symbolic register 'reg'.
1205  */
1206 static int
1207 get_register_force_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, int reg)
1208 {
1209         MonoInst *load;
1210         int i, sel, spill;
1211         
1212         sel = cfg->rs->iassign [reg];
1213         /*i = cfg->rs->isymbolic [sel];
1214         g_assert (i == reg);*/
1215         i = reg;
1216         spill = ++cfg->spill_count;
1217         cfg->rs->iassign [i] = -spill - 1;
1218         mono_regstate_free_int (cfg->rs, sel);
1219         /* we need to create a spill var and insert a load to sel after the current instruction */
1220         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1221         load->dreg = sel;
1222         load->inst_basereg = X86_EBP;
1223         load->inst_offset = mono_spillvar_offset (cfg, spill);
1224         if (item->prev) {
1225                 while (ins->next != item->prev->data)
1226                         ins = ins->next;
1227         }
1228         load->next = ins->next;
1229         ins->next = load;
1230         DEBUG (g_print ("SPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1231         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1232         g_assert (i == sel);
1233
1234         return sel;
1235 }
1236
1237 static int
1238 get_register_spilling (MonoCompile *cfg, InstList *item, MonoInst *ins, guint32 regmask, int reg)
1239 {
1240         MonoInst *load;
1241         int i, sel, spill;
1242
1243         DEBUG (g_print ("\tstart regmask to assign R%d: 0x%08x (R%d <- R%d R%d)\n", reg, regmask, ins->dreg, ins->sreg1, ins->sreg2));
1244         /* exclude the registers in the current instruction */
1245         if (reg != ins->sreg1 && (reg_is_freeable (ins->sreg1) || (ins->sreg1 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg1] >= 0))) {
1246                 if (ins->sreg1 >= MONO_MAX_IREGS)
1247                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg1]);
1248                 else
1249                         regmask &= ~ (1 << ins->sreg1);
1250                 DEBUG (g_print ("\t\texcluding sreg1 %s\n", mono_arch_regname (ins->sreg1)));
1251         }
1252         if (reg != ins->sreg2 && (reg_is_freeable (ins->sreg2) || (ins->sreg2 >= MONO_MAX_IREGS && cfg->rs->iassign [ins->sreg2] >= 0))) {
1253                 if (ins->sreg2 >= MONO_MAX_IREGS)
1254                         regmask &= ~ (1 << cfg->rs->iassign [ins->sreg2]);
1255                 else
1256                         regmask &= ~ (1 << ins->sreg2);
1257                 DEBUG (g_print ("\t\texcluding sreg2 %s %d\n", mono_arch_regname (ins->sreg2), ins->sreg2));
1258         }
1259         if (reg != ins->dreg && reg_is_freeable (ins->dreg)) {
1260                 regmask &= ~ (1 << ins->dreg);
1261                 DEBUG (g_print ("\t\texcluding dreg %s\n", mono_arch_regname (ins->dreg)));
1262         }
1263
1264         DEBUG (g_print ("\t\tavailable regmask: 0x%08x\n", regmask));
1265         g_assert (regmask); /* need at least a register we can free */
1266         sel = -1;
1267         /* we should track prev_use and spill the register that's farther */
1268         for (i = 0; i < MONO_MAX_IREGS; ++i) {
1269                 if (regmask & (1 << i)) {
1270                         sel = i;
1271                         DEBUG (g_print ("\t\tselected register %s has assignment %d\n", mono_arch_regname (sel), cfg->rs->iassign [sel]));
1272                         break;
1273                 }
1274         }
1275         i = cfg->rs->isymbolic [sel];
1276         spill = ++cfg->spill_count;
1277         cfg->rs->iassign [i] = -spill - 1;
1278         mono_regstate_free_int (cfg->rs, sel);
1279         /* we need to create a spill var and insert a load to sel after the current instruction */
1280         MONO_INST_NEW (cfg, load, OP_LOAD_MEMBASE);
1281         load->dreg = sel;
1282         load->inst_basereg = X86_EBP;
1283         load->inst_offset = mono_spillvar_offset (cfg, spill);
1284         if (item->prev) {
1285                 while (ins->next != item->prev->data)
1286                         ins = ins->next;
1287         }
1288         load->next = ins->next;
1289         ins->next = load;
1290         DEBUG (g_print ("\tSPILLED LOAD (%d at 0x%08x(%%ebp)) R%d (freed %s)\n", spill, load->inst_offset, i, mono_arch_regname (sel)));
1291         i = mono_regstate_alloc_int (cfg->rs, 1 << sel);
1292         g_assert (i == sel);
1293         
1294         return sel;
1295 }
1296
1297 static MonoInst*
1298 create_copy_ins (MonoCompile *cfg, int dest, int src, MonoInst *ins)
1299 {
1300         MonoInst *copy;
1301         MONO_INST_NEW (cfg, copy, OP_MOVE);
1302         copy->dreg = dest;
1303         copy->sreg1 = src;
1304         if (ins) {
1305                 copy->next = ins->next;
1306                 ins->next = copy;
1307         }
1308         DEBUG (g_print ("\tforced copy from %s to %s\n", mono_arch_regname (src), mono_arch_regname (dest)));
1309         return copy;
1310 }
1311
1312 static MonoInst*
1313 create_spilled_store (MonoCompile *cfg, int spill, int reg, int prev_reg, MonoInst *ins)
1314 {
1315         MonoInst *store;
1316         MONO_INST_NEW (cfg, store, OP_STORE_MEMBASE_REG);
1317         store->sreg1 = reg;
1318         store->inst_destbasereg = X86_EBP;
1319         store->inst_offset = mono_spillvar_offset (cfg, spill);
1320         if (ins) {
1321                 store->next = ins->next;
1322                 ins->next = store;
1323         }
1324         DEBUG (g_print ("\tSPILLED STORE (%d at 0x%08x(%%ebp)) R%d (from %s)\n", spill, store->inst_offset, prev_reg, mono_arch_regname (reg)));
1325         return store;
1326 }
1327
1328 static void
1329 insert_before_ins (MonoInst *ins, InstList *item, MonoInst* to_insert)
1330 {
1331         MonoInst *prev;
1332         if (item->next) {
1333                 prev = item->next->data;
1334
1335                 while (prev->next != ins)
1336                         prev = prev->next;
1337                 to_insert->next = ins;
1338                 prev->next = to_insert;
1339         } else {
1340                 to_insert->next = ins;
1341         }
1342         /* 
1343          * needed otherwise in the next instruction we can add an ins to the 
1344          * end and that would get past this instruction.
1345          */
1346         item->data = to_insert; 
1347 }
1348
1349
1350 #if  0
1351 static int
1352 alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg, guint32 allow_mask)
1353 {
1354         int val = cfg->rs->iassign [sym_reg];
1355         if (val < 0) {
1356                 int spill = 0;
1357                 if (val < -1) {
1358                         /* the register gets spilled after this inst */
1359                         spill = -val -1;
1360                 }
1361                 val = mono_regstate_alloc_int (cfg->rs, allow_mask);
1362                 if (val < 0)
1363                         val = get_register_spilling (cfg, curinst, ins, allow_mask, sym_reg);
1364                 cfg->rs->iassign [sym_reg] = val;
1365                 /* add option to store before the instruction for src registers */
1366                 if (spill)
1367                         create_spilled_store (cfg, spill, val, sym_reg, ins);
1368         }
1369         cfg->rs->isymbolic [val] = sym_reg;
1370         return val;
1371 }
1372 #endif
1373
1374 /* flags used in reginfo->flags */
1375 enum {
1376         MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
1377         MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
1378         MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
1379         MONO_X86_REG_NOT_ECX                    = 1 << 3,
1380         MONO_X86_REG_EAX                                = 1 << 4,
1381         MONO_X86_REG_EDX                                = 1 << 5,
1382         MONO_X86_REG_ECX                                = 1 << 6
1383 };
1384
1385 static int
1386 mono_x86_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
1387 {
1388         int val;
1389         int test_mask = dest_mask;
1390
1391         if (flags & MONO_X86_REG_EAX)
1392                 test_mask &= (1 << X86_EAX);
1393         else if (flags & MONO_X86_REG_EDX)
1394                 test_mask &= (1 << X86_EDX);
1395         else if (flags & MONO_X86_REG_ECX)
1396                 test_mask &= (1 << X86_ECX);
1397         else if (flags & MONO_X86_REG_NOT_ECX)
1398                 test_mask &= ~ (1 << X86_ECX);
1399
1400         val = mono_regstate_alloc_int (cfg->rs, test_mask);
1401         if (val >= 0 && test_mask != dest_mask)
1402                 DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
1403
1404         if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
1405                 DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
1406                 val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << X86_ECX)));
1407         }
1408
1409         if (val < 0) {
1410                 val = mono_regstate_alloc_int (cfg->rs, dest_mask);
1411                 if (val < 0)
1412                         val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
1413         }
1414
1415         return val;
1416 }
1417
1418 static inline void
1419 assign_ireg (MonoRegState *rs, int reg, int hreg)
1420 {
1421         g_assert (reg >= MONO_MAX_IREGS);
1422         g_assert (hreg < MONO_MAX_IREGS);
1423         g_assert (! is_global_ireg (hreg));
1424
1425         rs->iassign [reg] = hreg;
1426         rs->isymbolic [hreg] = reg;
1427         rs->ifree_mask &= ~ (1 << hreg);
1428 }
1429
1430 /*#include "cprop.c"*/
1431
1432 /*
1433  * Local register allocation.
1434  * We first scan the list of instructions and we save the liveness info of
1435  * each register (when the register is first used, when it's value is set etc.).
1436  * We also reverse the list of instructions (in the InstList list) because assigning
1437  * registers backwards allows for more tricks to be used.
1438  */
1439 void
1440 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1441 {
1442         MonoInst *ins;
1443         MonoRegState *rs = cfg->rs;
1444         int i, val, fpcount;
1445         RegTrack *reginfo, *reginfof;
1446         RegTrack *reginfo1, *reginfo2, *reginfod;
1447         InstList *tmp, *reversed = NULL;
1448         const char *spec;
1449         guint32 src1_mask, src2_mask, dest_mask;
1450         GList *fspill_list = NULL;
1451         int fspill = 0;
1452
1453         if (!bb->code)
1454                 return;
1455         rs->next_vireg = bb->max_ireg;
1456         rs->next_vfreg = bb->max_freg;
1457         mono_regstate_assign (rs);
1458         reginfo = g_malloc0 (sizeof (RegTrack) * rs->next_vireg);
1459         reginfof = g_malloc0 (sizeof (RegTrack) * rs->next_vfreg);
1460         rs->ifree_mask = X86_CALLEE_REGS;
1461
1462         ins = bb->code;
1463
1464         /*if (cfg->opt & MONO_OPT_COPYPROP)
1465                 local_copy_prop (cfg, ins);*/
1466
1467         i = 1;
1468         fpcount = 0;
1469         DEBUG (g_print ("LOCAL regalloc: basic block: %d\n", bb->block_num));
1470         /* forward pass on the instructions to collect register liveness info */
1471         while (ins) {
1472                 spec = ins_spec [ins->opcode];
1473                 
1474                 DEBUG (print_ins (i, ins));
1475
1476                 if (spec [MONO_INST_SRC1]) {
1477                         if (spec [MONO_INST_SRC1] == 'f') {
1478                                 GList *spill;
1479                                 reginfo1 = reginfof;
1480
1481                                 spill = g_list_first (fspill_list);
1482                                 if (spill && fpcount < MONO_MAX_FREGS) {
1483                                         reginfo1 [ins->sreg1].flags |= MONO_X86_FP_NEEDS_LOAD;
1484                                         fspill_list = g_list_remove (fspill_list, spill->data);
1485                                 } else
1486                                         fpcount--;
1487                         }
1488                         else
1489                                 reginfo1 = reginfo;
1490                         reginfo1 [ins->sreg1].prev_use = reginfo1 [ins->sreg1].last_use;
1491                         reginfo1 [ins->sreg1].last_use = i;
1492                         if (spec [MONO_INST_SRC1] == 'L') {
1493                                 /* The virtual register is allocated sequentially */
1494                                 reginfo1 [ins->sreg1 + 1].prev_use = reginfo1 [ins->sreg1 + 1].last_use;
1495                                 reginfo1 [ins->sreg1 + 1].last_use = i;
1496                                 if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
1497                                         reginfo1 [ins->sreg1 + 1].born_in = i;
1498
1499                                 reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
1500                                 reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
1501                         }
1502                 } else {
1503                         ins->sreg1 = -1;
1504                 }
1505                 if (spec [MONO_INST_SRC2]) {
1506                         if (spec [MONO_INST_SRC2] == 'f') {
1507                                 GList *spill;
1508                                 reginfo2 = reginfof;
1509                                 spill = g_list_first (fspill_list);
1510                                 if (spill) {
1511                                         reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD;
1512                                         fspill_list = g_list_remove (fspill_list, spill->data);
1513                                         if (fpcount >= MONO_MAX_FREGS) {
1514                                                 fspill++;
1515                                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1516                                                 reginfo2 [ins->sreg2].flags |= MONO_X86_FP_NEEDS_LOAD_SPILL;
1517                                         }
1518                                 } else
1519                                         fpcount--;
1520                         }
1521                         else
1522                                 reginfo2 = reginfo;
1523                         reginfo2 [ins->sreg2].prev_use = reginfo2 [ins->sreg2].last_use;
1524                         reginfo2 [ins->sreg2].last_use = i;
1525                         if (spec [MONO_INST_SRC2] == 'L') {
1526                                 /* The virtual register is allocated sequentially */
1527                                 reginfo2 [ins->sreg2 + 1].prev_use = reginfo2 [ins->sreg2 + 1].last_use;
1528                                 reginfo2 [ins->sreg2 + 1].last_use = i;
1529                                 if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
1530                                         reginfo2 [ins->sreg2 + 1].born_in = i;
1531                         }
1532                         if (spec [MONO_INST_CLOB] == 's') {
1533                                 reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
1534                                 reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
1535                         }
1536                 } else {
1537                         ins->sreg2 = -1;
1538                 }
1539                 if (spec [MONO_INST_DEST]) {
1540                         if (spec [MONO_INST_DEST] == 'f') {
1541                                 reginfod = reginfof;
1542                                 if (fpcount >= MONO_MAX_FREGS) {
1543                                         reginfod [ins->dreg].flags |= MONO_X86_FP_NEEDS_SPILL;
1544                                         fspill++;
1545                                         fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1546                                         fpcount--;
1547                                 }
1548                                 fpcount++;
1549                         }
1550                         else
1551                                 reginfod = reginfo;
1552                         if (spec [MONO_INST_DEST] != 'b') /* it's not just a base register */
1553                                 reginfod [ins->dreg].killed_in = i;
1554                         reginfod [ins->dreg].prev_use = reginfod [ins->dreg].last_use;
1555                         reginfod [ins->dreg].last_use = i;
1556                         if (reginfod [ins->dreg].born_in == 0 || reginfod [ins->dreg].born_in > i)
1557                                 reginfod [ins->dreg].born_in = i;
1558                         if (spec [MONO_INST_DEST] == 'l' || spec [MONO_INST_DEST] == 'L') {
1559                                 /* The virtual register is allocated sequentially */
1560                                 reginfod [ins->dreg + 1].prev_use = reginfod [ins->dreg + 1].last_use;
1561                                 reginfod [ins->dreg + 1].last_use = i;
1562                                 if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
1563                                         reginfod [ins->dreg + 1].born_in = i;
1564
1565                                 reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
1566                                 reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
1567                         }
1568                 } else {
1569                         ins->dreg = -1;
1570                 }
1571
1572                 reversed = inst_list_prepend (cfg->mempool, reversed, ins);
1573                 ++i;
1574                 ins = ins->next;
1575         }
1576
1577         // todo: check if we have anything left on fp stack, in verify mode?
1578         fspill = 0;
1579
1580         DEBUG (print_regtrack (reginfo, rs->next_vireg));
1581         DEBUG (print_regtrack (reginfof, rs->next_vfreg));
1582         tmp = reversed;
1583         while (tmp) {
1584                 int prev_dreg, prev_sreg1, prev_sreg2, clob_dreg;
1585                 dest_mask = src1_mask = src2_mask = X86_CALLEE_REGS;
1586                 --i;
1587                 ins = tmp->data;
1588                 spec = ins_spec [ins->opcode];
1589                 prev_dreg = -1;
1590                 clob_dreg = -1;
1591                 DEBUG (g_print ("processing:"));
1592                 DEBUG (print_ins (i, ins));
1593                 if (spec [MONO_INST_CLOB] == 's') {
1594                         /*
1595                          * Shift opcodes, SREG2 must be RCX
1596                          */
1597                         if (rs->ifree_mask & (1 << X86_ECX)) {
1598                                 if (ins->sreg2 < MONO_MAX_IREGS) {
1599                                         /* Argument already in hard reg, need to copy */
1600                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
1601                                         insert_before_ins (ins, tmp, copy);
1602                                 }
1603                                 else {
1604                                         DEBUG (g_print ("\tshortcut assignment of R%d to ECX\n", ins->sreg2));
1605                                         assign_ireg (rs, ins->sreg2, X86_ECX);
1606                                 }
1607                         } else {
1608                                 int need_ecx_spill = TRUE;
1609                                 /* 
1610                                  * we first check if src1/dreg is already assigned a register
1611                                  * and then we force a spill of the var assigned to ECX.
1612                                  */
1613                                 /* the destination register can't be ECX */
1614                                 dest_mask &= ~ (1 << X86_ECX);
1615                                 src1_mask &= ~ (1 << X86_ECX);
1616                                 val = rs->iassign [ins->dreg];
1617                                 /* 
1618                                  * the destination register is already assigned to ECX:
1619                                  * we need to allocate another register for it and then
1620                                  * copy from this to ECX.
1621                                  */
1622                                 if (val == X86_ECX && ins->dreg != ins->sreg2) {
1623                                         int new_dest;
1624                                         new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
1625                                         g_assert (new_dest >= 0);
1626                                         DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
1627
1628                                         rs->isymbolic [new_dest] = ins->dreg;
1629                                         rs->iassign [ins->dreg] = new_dest;
1630                                         clob_dreg = ins->dreg;
1631                                         ins->dreg = new_dest;
1632                                         create_copy_ins (cfg, X86_ECX, new_dest, ins);
1633                                         need_ecx_spill = FALSE;
1634                                         /*DEBUG (g_print ("\tforced spill of R%d\n", ins->dreg));
1635                                         val = get_register_force_spilling (cfg, tmp, ins, ins->dreg);
1636                                         rs->iassign [ins->dreg] = val;
1637                                         rs->isymbolic [val] = prev_dreg;
1638                                         ins->dreg = val;*/
1639                                 }
1640                                 if (is_global_ireg (ins->sreg2)) {
1641                                         MonoInst *copy = create_copy_ins (cfg, X86_ECX, ins->sreg2, NULL);
1642                                         insert_before_ins (ins, tmp, copy);
1643                                 }
1644                                 else {
1645                                         val = rs->iassign [ins->sreg2];
1646                                         if (val >= 0 && val != X86_ECX) {
1647                                                 MonoInst *move = create_copy_ins (cfg, X86_ECX, val, NULL);
1648                                                 DEBUG (g_print ("\tmoved arg from R%d (%d) to ECX\n", val, ins->sreg2));
1649                                                 move->next = ins;
1650                                                 g_assert_not_reached ();
1651                                                 /* FIXME: where is move connected to the instruction list? */
1652                                                 //tmp->prev->data->next = move;
1653                                         }
1654                                         else {
1655                                                 if (val == X86_ECX)
1656                                                 need_ecx_spill = FALSE;
1657                                         }
1658                                 }
1659                                 if (need_ecx_spill && !(rs->ifree_mask & (1 << X86_ECX))) {
1660                                         DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_ECX]));
1661                                         get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_ECX]);
1662                                         mono_regstate_free_int (rs, X86_ECX);
1663                                 }
1664                                 if (!is_global_ireg (ins->sreg2))
1665                                         /* force-set sreg2 */
1666                                         assign_ireg (rs, ins->sreg2, X86_ECX);
1667                         }
1668                         ins->sreg2 = X86_ECX;
1669                 } else if (spec [MONO_INST_CLOB] == 'd') {
1670                         /*
1671                          * DIVISION/REMAINER
1672                          */
1673                         int dest_reg = X86_EAX;
1674                         int clob_reg = X86_EDX;
1675                         if (spec [MONO_INST_DEST] == 'd') {
1676                                 dest_reg = X86_EDX; /* reminder */
1677                                 clob_reg = X86_EAX;
1678                         }
1679                         if (is_global_ireg (ins->dreg))
1680                                 val = ins->dreg;
1681                         else
1682                                 val = rs->iassign [ins->dreg];
1683                         if (0 && val >= 0 && val != dest_reg && !(rs->ifree_mask & (1 << dest_reg))) {
1684                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
1685                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
1686                                 mono_regstate_free_int (rs, dest_reg);
1687                         }
1688                         if (val < 0) {
1689                                 if (val < -1) {
1690                                         /* the register gets spilled after this inst */
1691                                         int spill = -val -1;
1692                                         dest_mask = 1 << clob_reg;
1693                                         prev_dreg = ins->dreg;
1694                                         val = mono_regstate_alloc_int (rs, dest_mask);
1695                                         if (val < 0)
1696                                                 val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
1697                                         rs->iassign [ins->dreg] = val;
1698                                         if (spill)
1699                                                 create_spilled_store (cfg, spill, val, prev_dreg, ins);
1700                                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
1701                                         rs->isymbolic [val] = prev_dreg;
1702                                         ins->dreg = val;
1703                                         if (val != dest_reg) { /* force a copy */
1704                                                 create_copy_ins (cfg, val, dest_reg, ins);
1705                                         }
1706                                 } else {
1707                                         DEBUG (g_print ("\tshortcut assignment of R%d to %s\n", ins->dreg, mono_arch_regname (dest_reg)));
1708                                         prev_dreg = ins->dreg;
1709                                         assign_ireg (rs, ins->dreg, dest_reg);
1710                                         ins->dreg = dest_reg;
1711                                 }
1712                         } else {
1713                                 //DEBUG (g_print ("dest reg in div assigned: %s\n", mono_arch_regname (val)));
1714                                 if (val != dest_reg) { /* force a copy */
1715                                         create_copy_ins (cfg, val, dest_reg, ins);
1716                                         if (!(rs->ifree_mask & (1 << dest_reg)) && rs->isymbolic [dest_reg] >= MONO_MAX_IREGS) {
1717                                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [dest_reg]));
1718                                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [dest_reg]);
1719                                                 mono_regstate_free_int (rs, dest_reg);
1720                                         }
1721                                 }
1722                         }
1723                         if (!(rs->ifree_mask & (1 << clob_reg)) && (clob_reg != val) && (rs->isymbolic [clob_reg] >= 8)) {
1724                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
1725                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
1726                                 mono_regstate_free_int (rs, clob_reg);
1727                         }
1728                         src1_mask = 1 << X86_EAX;
1729                         src2_mask = 1 << X86_ECX;
1730                 } else if (spec [MONO_INST_DEST] == 'l') {
1731                         int hreg;
1732                         val = rs->iassign [ins->dreg];
1733                         /* check special case when dreg have been moved from ecx (clob shift) */
1734                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
1735                                 hreg = clob_dreg + 1;
1736                         else
1737                                 hreg = ins->dreg + 1;
1738
1739                         /* base prev_dreg on fixed hreg, handle clob case */
1740                         val = hreg - 1;
1741
1742                         if (val != rs->isymbolic [X86_EAX] && !(rs->ifree_mask & (1 << X86_EAX))) {
1743                                 DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [X86_EAX]));
1744                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
1745                                 mono_regstate_free_int (rs, X86_EAX);
1746                         }
1747                         if (hreg != rs->isymbolic [X86_EDX] && !(rs->ifree_mask & (1 << X86_EDX))) {
1748                                 DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [X86_EDX]));
1749                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
1750                                 mono_regstate_free_int (rs, X86_EDX);
1751                         }
1752                 } else if (spec [MONO_INST_CLOB] == 'b') {
1753                         /*
1754                          * x86_set_reg instructions, dreg needs to be EAX..EDX
1755                          */     
1756                         dest_mask = (1 << X86_EAX) | (1 << X86_EBX) | (1 << X86_ECX) | (1 << X86_EDX);
1757                         if ((ins->dreg < MONO_MAX_IREGS) && (! (dest_mask & (1 << ins->dreg)))) {
1758                                 /* 
1759                                  * ins->dreg is already a hard reg, need to allocate another
1760                                  * suitable hard reg and make a copy.
1761                                  */
1762                                 int new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
1763                                 g_assert (new_dest >= 0);
1764
1765                                 create_copy_ins (cfg, ins->dreg, new_dest, ins);
1766                                 DEBUG (g_print ("\tclob:b changing dreg R%d to %s\n", ins->dreg, mono_arch_regname (new_dest)));
1767                                 ins->dreg = new_dest;
1768
1769                                 /* The hard reg is no longer needed */
1770                                 mono_regstate_free_int (rs, new_dest);
1771                         }
1772                 }
1773
1774                 /*
1775                  * TRACK DREG
1776                  */
1777                 if (spec [MONO_INST_DEST] == 'f') {
1778                         if (reginfof [ins->dreg].flags & MONO_X86_FP_NEEDS_SPILL) {
1779                                 GList *spill_node;
1780                                 MonoInst *store;
1781                                 spill_node = g_list_first (fspill_list);
1782                                 g_assert (spill_node);
1783
1784                                 store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->dreg, ins);
1785                                 insert_before_ins (ins, tmp, store);
1786                                 fspill_list = g_list_remove (fspill_list, spill_node->data);
1787                                 fspill--;
1788                         }
1789                 } else if (spec [MONO_INST_DEST] == 'L') {
1790                         int hreg;
1791                         val = rs->iassign [ins->dreg];
1792                         /* check special case when dreg have been moved from ecx (clob shift) */
1793                         if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
1794                                 hreg = clob_dreg + 1;
1795                         else
1796                                 hreg = ins->dreg + 1;
1797
1798                         /* base prev_dreg on fixed hreg, handle clob case */
1799                         prev_dreg = hreg - 1;
1800
1801                         if (val < 0) {
1802                                 int spill = 0;
1803                                 if (val < -1) {
1804                                         /* the register gets spilled after this inst */
1805                                         spill = -val -1;
1806                                 }
1807                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
1808                                 rs->iassign [ins->dreg] = val;
1809                                 if (spill)
1810                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
1811                         }
1812
1813                         DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
1814  
1815                         rs->isymbolic [val] = hreg - 1;
1816                         ins->dreg = val;
1817                         
1818                         val = rs->iassign [hreg];
1819                         if (val < 0) {
1820                                 int spill = 0;
1821                                 if (val < -1) {
1822                                         /* the register gets spilled after this inst */
1823                                         spill = -val -1;
1824                                 }
1825                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
1826                                 rs->iassign [hreg] = val;
1827                                 if (spill)
1828                                         create_spilled_store (cfg, spill, val, hreg, ins);
1829                         }
1830
1831                         DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
1832                         rs->isymbolic [val] = hreg;
1833                         /* save reg allocating into unused */
1834                         ins->unused = val;
1835
1836                         /* check if we can free our long reg */
1837                         if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
1838                                 DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
1839                                 mono_regstate_free_int (rs, val);
1840                         }
1841                 }
1842                 else if (ins->dreg >= MONO_MAX_IREGS) {
1843                         int hreg;
1844                         val = rs->iassign [ins->dreg];
1845                         if (spec [MONO_INST_DEST] == 'l') {
1846                                 /* check special case when dreg have been moved from ecx (clob shift) */
1847                                 if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
1848                                         hreg = clob_dreg + 1;
1849                                 else
1850                                         hreg = ins->dreg + 1;
1851
1852                                 /* base prev_dreg on fixed hreg, handle clob case */
1853                                 prev_dreg = hreg - 1;
1854                         } else
1855                                 prev_dreg = ins->dreg;
1856
1857                         if (val < 0) {
1858                                 int spill = 0;
1859                                 if (val < -1) {
1860                                         /* the register gets spilled after this inst */
1861                                         spill = -val -1;
1862                                 }
1863                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
1864                                 rs->iassign [ins->dreg] = val;
1865                                 if (spill)
1866                                         create_spilled_store (cfg, spill, val, prev_dreg, ins);
1867                         }
1868                         DEBUG (g_print ("\tassigned dreg %s to dest R%d\n", mono_arch_regname (val), ins->dreg));
1869                         rs->isymbolic [val] = prev_dreg;
1870                         ins->dreg = val;
1871                         /* handle cases where lreg needs to be eax:edx */
1872                         if (spec [MONO_INST_DEST] == 'l') {
1873                                 /* check special case when dreg have been moved from ecx (clob shift) */
1874                                 int hreg = prev_dreg + 1;
1875                                 val = rs->iassign [hreg];
1876                                 if (val < 0) {
1877                                         int spill = 0;
1878                                         if (val < -1) {
1879                                                 /* the register gets spilled after this inst */
1880                                                 spill = -val -1;
1881                                         }
1882                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
1883                                         rs->iassign [hreg] = val;
1884                                         if (spill)
1885                                                 create_spilled_store (cfg, spill, val, hreg, ins);
1886                                 }
1887                                 DEBUG (g_print ("\tassigned hreg %s to dest R%d\n", mono_arch_regname (val), hreg));
1888                                 rs->isymbolic [val] = hreg;
1889                                 if (ins->dreg == X86_EAX) {
1890                                         if (val != X86_EDX)
1891                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1892                                 } else if (ins->dreg == X86_EDX) {
1893                                         if (val == X86_EAX) {
1894                                                 /* swap */
1895                                                 g_assert_not_reached ();
1896                                         } else {
1897                                                 /* two forced copies */
1898                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1899                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1900                                         }
1901                                 } else {
1902                                         if (val == X86_EDX) {
1903                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1904                                         } else {
1905                                                 /* two forced copies */
1906                                                 create_copy_ins (cfg, val, X86_EDX, ins);
1907                                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1908                                         }
1909                                 }
1910                                 if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
1911                                         DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
1912                                         mono_regstate_free_int (rs, val);
1913                                 }
1914                         } else if (spec [MONO_INST_DEST] == 'a' && ins->dreg != X86_EAX && spec [MONO_INST_CLOB] != 'd') {
1915                                 /* this instruction only outputs to EAX, need to copy */
1916                                 create_copy_ins (cfg, ins->dreg, X86_EAX, ins);
1917                         } else if (spec [MONO_INST_DEST] == 'd' && ins->dreg != X86_EDX && spec [MONO_INST_CLOB] != 'd') {
1918                                 create_copy_ins (cfg, ins->dreg, X86_EDX, ins);
1919                         }
1920                 }
1921                 if (spec [MONO_INST_DEST] != 'f' && reg_is_freeable (ins->dreg) && prev_dreg >= 0 && reginfo [prev_dreg].born_in >= i) {
1922                         DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), prev_dreg, reginfo [prev_dreg].born_in));
1923                         mono_regstate_free_int (rs, ins->dreg);
1924                 }
1925                 /* put src1 in EAX if it needs to be */
1926                 if (spec [MONO_INST_SRC1] == 'a') {
1927                         if (!(rs->ifree_mask & (1 << X86_EAX))) {
1928                                 DEBUG (g_print ("\tforced spill of R%d\n", rs->isymbolic [X86_EAX]));
1929                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
1930                                 mono_regstate_free_int (rs, X86_EAX);
1931                         }
1932                         if (ins->sreg1 < MONO_MAX_IREGS) {
1933                                 /* The argument is already in a hard reg, need to copy */
1934                                 MonoInst *copy = create_copy_ins (cfg, X86_EAX, ins->sreg1, NULL);
1935                                 insert_before_ins (ins, tmp, copy);
1936                         }
1937                         else
1938                                 /* force-set sreg1 */
1939                                 assign_ireg (rs, ins->sreg1, X86_EAX);
1940                         ins->sreg1 = X86_EAX;
1941                 }
1942
1943                 /*
1944                  * TRACK SREG1
1945                  */
1946                 if (spec [MONO_INST_SRC1] == 'f') {
1947                         if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD) {
1948                                 MonoInst *load;
1949                                 MonoInst *store = NULL;
1950
1951                                 if (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
1952                                         GList *spill_node;
1953                                         spill_node = g_list_first (fspill_list);
1954                                         g_assert (spill_node);
1955
1956                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg1, ins);          
1957                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
1958                                 }
1959
1960                                 fspill++;
1961                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
1962                                 load = create_spilled_load_float (cfg, fspill, ins->sreg1, ins);
1963                                 insert_before_ins (ins, tmp, load);
1964                                 if (store) 
1965                                         insert_before_ins (load, tmp, store);
1966                         }
1967                 } else if ((spec [MONO_INST_DEST] == 'L') && (spec [MONO_INST_SRC1] == 'L')) {
1968                         /* force source to be same as dest */
1969                         assign_ireg (rs, ins->sreg1, ins->dreg);
1970                         assign_ireg (rs, ins->sreg1 + 1, ins->unused);
1971
1972                         DEBUG (g_print ("\tassigned sreg1 (long) %s to sreg1 R%d\n", mono_arch_regname (ins->dreg), ins->sreg1));
1973                         DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
1974
1975                         ins->sreg1 = ins->dreg;
1976                         /* 
1977                          * No need for saving the reg, we know that src1=dest in this cases
1978                          * ins->inst_c0 = ins->unused;
1979                          */
1980                 }
1981                 else if (ins->sreg1 >= MONO_MAX_IREGS) {
1982                         val = rs->iassign [ins->sreg1];
1983                         prev_sreg1 = ins->sreg1;
1984                         if (val < 0) {
1985                                 int spill = 0;
1986                                 if (val < -1) {
1987                                         /* the register gets spilled after this inst */
1988                                         spill = -val -1;
1989                                 }
1990                                 if (0 && ins->opcode == OP_MOVE) {
1991                                         /* 
1992                                          * small optimization: the dest register is already allocated
1993                                          * but the src one is not: we can simply assign the same register
1994                                          * here and peephole will get rid of the instruction later.
1995                                          * This optimization may interfere with the clobbering handling:
1996                                          * it removes a mov operation that will be added again to handle clobbering.
1997                                          * There are also some other issues that should with make testjit.
1998                                          */
1999                                         mono_regstate_alloc_int (rs, 1 << ins->dreg);
2000                                         val = rs->iassign [ins->sreg1] = ins->dreg;
2001                                         //g_assert (val >= 0);
2002                                         DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2003                                 } else {
2004                                         //g_assert (val == -1); /* source cannot be spilled */
2005                                         val = mono_x86_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
2006                                         rs->iassign [ins->sreg1] = val;
2007                                         DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
2008                                 }
2009                                 if (spill) {
2010                                         MonoInst *store = create_spilled_store (cfg, spill, val, prev_sreg1, NULL);
2011                                         insert_before_ins (ins, tmp, store);
2012                                 }
2013                         }
2014                         rs->isymbolic [val] = prev_sreg1;
2015                         ins->sreg1 = val;
2016                 } else {
2017                         prev_sreg1 = -1;
2018                 }
2019                 /* handle clobbering of sreg1 */
2020                 if ((spec [MONO_INST_CLOB] == '1' || spec [MONO_INST_CLOB] == 's') && ins->dreg != ins->sreg1) {
2021                         MonoInst *sreg2_copy = NULL;
2022
2023                         if (ins->dreg == ins->sreg2) {
2024                                 /* 
2025                                  * copying sreg1 to dreg could clobber sreg2, so allocate a new
2026                                  * register for it.
2027                                  */
2028                                 int reg2 = 0;
2029
2030                                 reg2 = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->sreg2, 0);
2031
2032                                 DEBUG (g_print ("\tneed to copy sreg2 %s to reg %s\n", mono_arch_regname (ins->sreg2), mono_arch_regname (reg2)));
2033                                 sreg2_copy = create_copy_ins (cfg, reg2, ins->sreg2, NULL);
2034                                 prev_sreg2 = ins->sreg2 = reg2;
2035
2036                                 mono_regstate_free_int (rs, reg2);
2037                         }
2038
2039                         MonoInst *copy = create_copy_ins (cfg, ins->dreg, ins->sreg1, NULL);
2040                         DEBUG (g_print ("\tneed to copy sreg1 %s to dreg %s\n", mono_arch_regname (ins->sreg1), mono_arch_regname (ins->dreg)));
2041                         insert_before_ins (ins, tmp, copy);
2042
2043                         if (sreg2_copy)
2044                                 insert_before_ins (copy, tmp, sreg2_copy);
2045
2046                         /*
2047                          * Need to prevent sreg2 to be allocated to sreg1, since that
2048                          * would screw up the previous copy.
2049                          */
2050                         src2_mask &= ~ (1 << ins->sreg1);
2051                         /* we set sreg1 to dest as well */
2052                         prev_sreg1 = ins->sreg1 = ins->dreg;
2053                         src2_mask &= ~ (1 << ins->dreg);
2054                 }
2055
2056                 /*
2057                  * TRACK SREG2
2058                  */
2059                 if (spec [MONO_INST_SRC2] == 'f') {
2060                         if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD) {
2061                                 MonoInst *load;
2062                                 MonoInst *store = NULL;
2063
2064                                 if (reginfof [ins->sreg2].flags & MONO_X86_FP_NEEDS_LOAD_SPILL) {
2065                                         GList *spill_node;
2066
2067                                         spill_node = g_list_first (fspill_list);
2068                                         g_assert (spill_node);
2069                                         if (spec [MONO_INST_SRC1] == 'f' && (reginfof [ins->sreg1].flags & MONO_X86_FP_NEEDS_LOAD_SPILL))
2070                                                 spill_node = g_list_next (spill_node);
2071         
2072                                         store = create_spilled_store_float (cfg, GPOINTER_TO_INT (spill_node->data), ins->sreg2, ins);
2073                                         fspill_list = g_list_remove (fspill_list, spill_node->data);
2074                                 } 
2075                                 
2076                                 fspill++;
2077                                 fspill_list = g_list_prepend (fspill_list, GINT_TO_POINTER(fspill));
2078                                 load = create_spilled_load_float (cfg, fspill, ins->sreg2, ins);
2079                                 insert_before_ins (ins, tmp, load);
2080                                 if (store) 
2081                                         insert_before_ins (load, tmp, store);
2082                         }
2083                 } 
2084                 else if (ins->sreg2 >= MONO_MAX_IREGS) {
2085                         val = rs->iassign [ins->sreg2];
2086                         prev_sreg2 = ins->sreg2;
2087                         if (val < 0) {
2088                                 int spill = 0;
2089                                 if (val < -1) {
2090                                         /* the register gets spilled after this inst */
2091                                         spill = -val -1;
2092                                 }
2093                                 val = mono_x86_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
2094                                 rs->iassign [ins->sreg2] = val;
2095                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
2096                                 if (spill)
2097                                         create_spilled_store (cfg, spill, val, prev_sreg2, ins);
2098                         }
2099                         rs->isymbolic [val] = prev_sreg2;
2100                         ins->sreg2 = val;
2101                         if (spec [MONO_INST_CLOB] == 's' && ins->sreg2 != X86_ECX) {
2102                                 DEBUG (g_print ("\tassigned sreg2 %s to R%d, but ECX is needed (R%d)\n", mono_arch_regname (val), ins->sreg2, rs->iassign [X86_ECX]));
2103                         }
2104                 } else {
2105                         prev_sreg2 = -1;
2106                 }
2107
2108                 if (spec [MONO_INST_CLOB] == 'c') {
2109                         int j, s;
2110                         guint32 clob_mask = X86_CALLEE_REGS;
2111                         for (j = 0; j < MONO_MAX_IREGS; ++j) {
2112                                 s = 1 << j;
2113                                 if ((clob_mask & s) && !(rs->ifree_mask & s) && j != ins->sreg1) {
2114                                         //g_warning ("register %s busy at call site\n", mono_arch_regname (j));
2115                                 }
2116                         }
2117                 }
2118                 if (spec [MONO_INST_CLOB] == 'a') {
2119                         guint32 clob_reg = X86_EAX;
2120                         if (!(rs->ifree_mask & (1 << clob_reg)) && (rs->isymbolic [clob_reg] >= 8)) {
2121                                 DEBUG (g_print ("\tforced spill of clobbered reg R%d\n", rs->isymbolic [clob_reg]));
2122                                 get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [clob_reg]);
2123                                 mono_regstate_free_int (rs, clob_reg);
2124                         }
2125                 }
2126                 /*if (reg_is_freeable (ins->sreg1) && prev_sreg1 >= 0 && reginfo [prev_sreg1].born_in >= i) {
2127                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg1)));
2128                         mono_regstate_free_int (rs, ins->sreg1);
2129                 }
2130                 if (reg_is_freeable (ins->sreg2) && prev_sreg2 >= 0 && reginfo [prev_sreg2].born_in >= i) {
2131                         DEBUG (g_print ("freeable %s\n", mono_arch_regname (ins->sreg2)));
2132                         mono_regstate_free_int (rs, ins->sreg2);
2133                 }*/
2134         
2135                 //DEBUG (print_ins (i, ins));
2136                 /* this may result from a insert_before call */
2137                 if (!tmp->next)
2138                         bb->code = tmp->data;
2139                 tmp = tmp->next;
2140         }
2141
2142         g_free (reginfo);
2143         g_free (reginfof);
2144         g_list_free (fspill_list);
2145 }
2146
2147 static unsigned char*
2148 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
2149 {
2150         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2151         x86_fnstcw_membase(code, X86_ESP, 0);
2152         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
2153         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
2154         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
2155         x86_fldcw_membase (code, X86_ESP, 2);
2156         if (size == 8) {
2157                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2158                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2159                 x86_pop_reg (code, dreg);
2160                 /* FIXME: need the high register 
2161                  * x86_pop_reg (code, dreg_high);
2162                  */
2163         } else {
2164                 x86_push_reg (code, X86_EAX); // SP = SP - 4
2165                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
2166                 x86_pop_reg (code, dreg);
2167         }
2168         x86_fldcw_membase (code, X86_ESP, 0);
2169         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2170
2171         if (size == 1)
2172                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
2173         else if (size == 2)
2174                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
2175         return code;
2176 }
2177
2178 static unsigned char*
2179 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
2180 {
2181         int sreg = tree->sreg1;
2182 #ifdef PLATFORM_WIN32
2183         guint8* br[5];
2184
2185         /*
2186          * Under Windows:
2187          * If requested stack size is larger than one page,
2188          * perform stack-touch operation
2189          */
2190         /*
2191          * Generate stack probe code.
2192          * Under Windows, it is necessary to allocate one page at a time,
2193          * "touching" stack after each successful sub-allocation. This is
2194          * because of the way stack growth is implemented - there is a
2195          * guard page before the lowest stack page that is currently commited.
2196          * Stack normally grows sequentially so OS traps access to the
2197          * guard page and commits more pages when needed.
2198          */
2199         x86_test_reg_imm (code, sreg, ~0xFFF);
2200         br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2201
2202         br[2] = code; /* loop */
2203         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
2204         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
2205         x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
2206         x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
2207         br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
2208         x86_patch (br[3], br[2]);
2209         x86_test_reg_reg (code, sreg, sreg);
2210         br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
2211         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2212
2213         br[1] = code; x86_jump8 (code, 0);
2214
2215         x86_patch (br[0], code);
2216         x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
2217         x86_patch (br[1], code);
2218         x86_patch (br[4], code);
2219 #else /* PLATFORM_WIN32 */
2220         x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
2221 #endif
2222         if (tree->flags & MONO_INST_INIT) {
2223                 int offset = 0;
2224                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
2225                         x86_push_reg (code, X86_EAX);
2226                         offset += 4;
2227                 }
2228                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
2229                         x86_push_reg (code, X86_ECX);
2230                         offset += 4;
2231                 }
2232                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
2233                         x86_push_reg (code, X86_EDI);
2234                         offset += 4;
2235                 }
2236                 
2237                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
2238                 if (sreg != X86_ECX)
2239                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
2240                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
2241                                 
2242                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
2243                 x86_cld (code);
2244                 x86_prefix (code, X86_REP_PREFIX);
2245                 x86_stosl (code);
2246                 
2247                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
2248                         x86_pop_reg (code, X86_EDI);
2249                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
2250                         x86_pop_reg (code, X86_ECX);
2251                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
2252                         x86_pop_reg (code, X86_EAX);
2253         }
2254         return code;
2255 }
2256
2257
2258 static guint8*
2259 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
2260 {
2261         /* Move return value to the target register */
2262         switch (ins->opcode) {
2263         case CEE_CALL:
2264         case OP_CALL_REG:
2265         case OP_CALL_MEMBASE:
2266                 if (ins->dreg != X86_EAX)
2267                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2268                 break;
2269         default:
2270                 break;
2271         }
2272
2273         return code;
2274 }
2275
2276 #define REAL_PRINT_REG(text,reg) \
2277 mono_assert (reg >= 0); \
2278 x86_push_reg (code, X86_EAX); \
2279 x86_push_reg (code, X86_EDX); \
2280 x86_push_reg (code, X86_ECX); \
2281 x86_push_reg (code, reg); \
2282 x86_push_imm (code, reg); \
2283 x86_push_imm (code, text " %d %p\n"); \
2284 x86_mov_reg_imm (code, X86_EAX, printf); \
2285 x86_call_reg (code, X86_EAX); \
2286 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2287 x86_pop_reg (code, X86_ECX); \
2288 x86_pop_reg (code, X86_EDX); \
2289 x86_pop_reg (code, X86_EAX);
2290
2291 /* benchmark and set based on cpu */
2292 #define LOOP_ALIGNMENT 8
2293 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2294
2295 void
2296 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2297 {
2298         MonoInst *ins;
2299         MonoCallInst *call;
2300         guint offset;
2301         guint8 *code = cfg->native_code + cfg->code_len;
2302         MonoInst *last_ins = NULL;
2303         guint last_offset = 0;
2304         int max_len, cpos;
2305
2306         if (cfg->opt & MONO_OPT_PEEPHOLE)
2307                 peephole_pass (cfg, bb);
2308
2309         if (cfg->opt & MONO_OPT_LOOP) {
2310                 int pad, align = LOOP_ALIGNMENT;
2311                 /* set alignment depending on cpu */
2312                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2313                         pad = align - pad;
2314                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2315                         x86_padding (code, pad);
2316                         cfg->code_len += pad;
2317                         bb->native_offset = cfg->code_len;
2318                 }
2319         }
2320
2321         if (cfg->verbose_level > 2)
2322                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2323
2324         cpos = bb->max_offset;
2325
2326         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2327                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2328                 g_assert (!cfg->compile_aot);
2329                 cpos += 6;
2330
2331                 cov->data [bb->dfn].cil_code = bb->cil_code;
2332                 /* this is not thread save, but good enough */
2333                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
2334         }
2335
2336         offset = code - cfg->native_code;
2337
2338         ins = bb->code;
2339         while (ins) {
2340                 offset = code - cfg->native_code;
2341
2342                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
2343
2344                 if (offset > (cfg->code_size - max_len - 16)) {
2345                         cfg->code_size *= 2;
2346                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2347                         code = cfg->native_code + offset;
2348                         mono_jit_stats.code_reallocs++;
2349                 }
2350
2351                 mono_debug_record_line_number (cfg, ins, offset);
2352
2353                 switch (ins->opcode) {
2354                 case OP_BIGMUL:
2355                         x86_mul_reg (code, ins->sreg2, TRUE);
2356                         break;
2357                 case OP_BIGMUL_UN:
2358                         x86_mul_reg (code, ins->sreg2, FALSE);
2359                         break;
2360                 case OP_X86_SETEQ_MEMBASE:
2361                 case OP_X86_SETNE_MEMBASE:
2362                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2363                                          ins->inst_basereg, ins->inst_offset, TRUE);
2364                         break;
2365                 case OP_STOREI1_MEMBASE_IMM:
2366                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2367                         break;
2368                 case OP_STOREI2_MEMBASE_IMM:
2369                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2370                         break;
2371                 case OP_STORE_MEMBASE_IMM:
2372                 case OP_STOREI4_MEMBASE_IMM:
2373                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2374                         break;
2375                 case OP_STOREI1_MEMBASE_REG:
2376                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2377                         break;
2378                 case OP_STOREI2_MEMBASE_REG:
2379                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2380                         break;
2381                 case OP_STORE_MEMBASE_REG:
2382                 case OP_STOREI4_MEMBASE_REG:
2383                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2384                         break;
2385                 case CEE_LDIND_I:
2386                 case CEE_LDIND_I4:
2387                 case CEE_LDIND_U4:
2388                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
2389                         break;
2390                 case OP_LOADU4_MEM:
2391                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
2392                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
2393                         break;
2394                 case OP_LOAD_MEMBASE:
2395                 case OP_LOADI4_MEMBASE:
2396                 case OP_LOADU4_MEMBASE:
2397                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2398                         break;
2399                 case OP_LOADU1_MEMBASE:
2400                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2401                         break;
2402                 case OP_LOADI1_MEMBASE:
2403                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2404                         break;
2405                 case OP_LOADU2_MEMBASE:
2406                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2407                         break;
2408                 case OP_LOADI2_MEMBASE:
2409                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2410                         break;
2411                 case CEE_CONV_I1:
2412                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2413                         break;
2414                 case CEE_CONV_I2:
2415                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2416                         break;
2417                 case CEE_CONV_U1:
2418                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2419                         break;
2420                 case CEE_CONV_U2:
2421                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2422                         break;
2423                 case OP_COMPARE:
2424                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2425                         break;
2426                 case OP_COMPARE_IMM:
2427                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2428                         break;
2429                 case OP_X86_COMPARE_MEMBASE_REG:
2430                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2431                         break;
2432                 case OP_X86_COMPARE_MEMBASE_IMM:
2433                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2434                         break;
2435                 case OP_X86_COMPARE_MEMBASE8_IMM:
2436                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2437                         break;
2438                 case OP_X86_COMPARE_REG_MEMBASE:
2439                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2440                         break;
2441                 case OP_X86_TEST_NULL:
2442                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2443                         break;
2444                 case OP_X86_ADD_MEMBASE_IMM:
2445                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2446                         break;
2447                 case OP_X86_ADD_MEMBASE:
2448                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2449                         break;
2450                 case OP_X86_SUB_MEMBASE_IMM:
2451                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2452                         break;
2453                 case OP_X86_SUB_MEMBASE:
2454                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2455                         break;
2456                 case OP_X86_INC_MEMBASE:
2457                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2458                         break;
2459                 case OP_X86_INC_REG:
2460                         x86_inc_reg (code, ins->dreg);
2461                         break;
2462                 case OP_X86_DEC_MEMBASE:
2463                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2464                         break;
2465                 case OP_X86_DEC_REG:
2466                         x86_dec_reg (code, ins->dreg);
2467                         break;
2468                 case OP_X86_MUL_MEMBASE:
2469                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2470                         break;
2471                 case CEE_BREAK:
2472                         x86_breakpoint (code);
2473                         break;
2474                 case OP_ADDCC:
2475                 case CEE_ADD:
2476                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2477                         break;
2478                 case OP_ADC:
2479                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2480                         break;
2481                 case OP_ADDCC_IMM:
2482                 case OP_ADD_IMM:
2483                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2484                         break;
2485                 case OP_ADC_IMM:
2486                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2487                         break;
2488                 case OP_SUBCC:
2489                 case CEE_SUB:
2490                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2491                         break;
2492                 case OP_SBB:
2493                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2494                         break;
2495                 case OP_SUBCC_IMM:
2496                 case OP_SUB_IMM:
2497                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2498                         break;
2499                 case OP_SBB_IMM:
2500                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2501                         break;
2502                 case CEE_AND:
2503                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2504                         break;
2505                 case OP_AND_IMM:
2506                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2507                         break;
2508                 case CEE_DIV:
2509                         x86_cdq (code);
2510                         x86_div_reg (code, ins->sreg2, TRUE);
2511                         break;
2512                 case CEE_DIV_UN:
2513                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2514                         x86_div_reg (code, ins->sreg2, FALSE);
2515                         break;
2516                 case OP_DIV_IMM:
2517                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2518                         x86_cdq (code);
2519                         x86_div_reg (code, ins->sreg2, TRUE);
2520                         break;
2521                 case CEE_REM:
2522                         x86_cdq (code);
2523                         x86_div_reg (code, ins->sreg2, TRUE);
2524                         break;
2525                 case CEE_REM_UN:
2526                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2527                         x86_div_reg (code, ins->sreg2, FALSE);
2528                         break;
2529                 case OP_REM_IMM:
2530                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2531                         x86_cdq (code);
2532                         x86_div_reg (code, ins->sreg2, TRUE);
2533                         break;
2534                 case CEE_OR:
2535                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2536                         break;
2537                 case OP_OR_IMM:
2538                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2539                         break;
2540                 case CEE_XOR:
2541                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2542                         break;
2543                 case OP_XOR_IMM:
2544                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2545                         break;
2546                 case CEE_SHL:
2547                         g_assert (ins->sreg2 == X86_ECX);
2548                         x86_shift_reg (code, X86_SHL, ins->dreg);
2549                         break;
2550                 case CEE_SHR:
2551                         g_assert (ins->sreg2 == X86_ECX);
2552                         x86_shift_reg (code, X86_SAR, ins->dreg);
2553                         break;
2554                 case OP_SHR_IMM:
2555                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2556                         break;
2557                 case OP_SHR_UN_IMM:
2558                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2559                         break;
2560                 case CEE_SHR_UN:
2561                         g_assert (ins->sreg2 == X86_ECX);
2562                         x86_shift_reg (code, X86_SHR, ins->dreg);
2563                         break;
2564                 case OP_SHL_IMM:
2565                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2566                         break;
2567                 case OP_LSHL: {
2568                         guint8 *jump_to_end;
2569
2570                         /* handle shifts below 32 bits */
2571                         x86_shld_reg (code, ins->unused, ins->sreg1);
2572                         x86_shift_reg (code, X86_SHL, ins->sreg1);
2573
2574                         x86_test_reg_imm (code, X86_ECX, 32);
2575                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2576
2577                         /* handle shift over 32 bit */
2578                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
2579                         x86_clear_reg (code, ins->sreg1);
2580                         
2581                         x86_patch (jump_to_end, code);
2582                         }
2583                         break;
2584                 case OP_LSHR: {
2585                         guint8 *jump_to_end;
2586
2587                         /* handle shifts below 32 bits */
2588                         x86_shrd_reg (code, ins->sreg1, ins->unused);
2589                         x86_shift_reg (code, X86_SAR, ins->unused);
2590
2591                         x86_test_reg_imm (code, X86_ECX, 32);
2592                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2593
2594                         /* handle shifts over 31 bits */
2595                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2596                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
2597                         
2598                         x86_patch (jump_to_end, code);
2599                         }
2600                         break;
2601                 case OP_LSHR_UN: {
2602                         guint8 *jump_to_end;
2603
2604                         /* handle shifts below 32 bits */
2605                         x86_shrd_reg (code, ins->sreg1, ins->unused);
2606                         x86_shift_reg (code, X86_SHR, ins->unused);
2607
2608                         x86_test_reg_imm (code, X86_ECX, 32);
2609                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2610
2611                         /* handle shifts over 31 bits */
2612                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2613                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
2614                         
2615                         x86_patch (jump_to_end, code);
2616                         }
2617                         break;
2618                 case OP_LSHL_IMM:
2619                         if (ins->inst_imm >= 32) {
2620                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
2621                                 x86_clear_reg (code, ins->sreg1);
2622                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
2623                         } else {
2624                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
2625                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2626                         }
2627                         break;
2628                 case OP_LSHR_IMM:
2629                         if (ins->inst_imm >= 32) {
2630                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
2631                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
2632                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2633                         } else {
2634                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2635                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
2636                         }
2637                         break;
2638                 case OP_LSHR_UN_IMM:
2639                         if (ins->inst_imm >= 32) {
2640                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2641                                 x86_clear_reg (code, ins->unused);
2642                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2643                         } else {
2644                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2645                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
2646                         }
2647                         break;
2648                 case CEE_NOT:
2649                         x86_not_reg (code, ins->sreg1);
2650                         break;
2651                 case CEE_NEG:
2652                         x86_neg_reg (code, ins->sreg1);
2653                         break;
2654                 case OP_SEXT_I1:
2655                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2656                         break;
2657                 case OP_SEXT_I2:
2658                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2659                         break;
2660                 case CEE_MUL:
2661                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2662                         break;
2663                 case OP_MUL_IMM:
2664                         x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2665                         break;
2666                 case CEE_MUL_OVF:
2667                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2668                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2669                         break;
2670                 case CEE_MUL_OVF_UN: {
2671                         /* the mul operation and the exception check should most likely be split */
2672                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2673                         /*g_assert (ins->sreg2 == X86_EAX);
2674                         g_assert (ins->dreg == X86_EAX);*/
2675                         if (ins->sreg2 == X86_EAX) {
2676                                 non_eax_reg = ins->sreg1;
2677                         } else if (ins->sreg1 == X86_EAX) {
2678                                 non_eax_reg = ins->sreg2;
2679                         } else {
2680                                 /* no need to save since we're going to store to it anyway */
2681                                 if (ins->dreg != X86_EAX) {
2682                                         saved_eax = TRUE;
2683                                         x86_push_reg (code, X86_EAX);
2684                                 }
2685                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2686                                 non_eax_reg = ins->sreg2;
2687                         }
2688                         if (ins->dreg == X86_EDX) {
2689                                 if (!saved_eax) {
2690                                         saved_eax = TRUE;
2691                                         x86_push_reg (code, X86_EAX);
2692                                 }
2693                         } else if (ins->dreg != X86_EAX) {
2694                                 saved_edx = TRUE;
2695                                 x86_push_reg (code, X86_EDX);
2696                         }
2697                         x86_mul_reg (code, non_eax_reg, FALSE);
2698                         /* save before the check since pop and mov don't change the flags */
2699                         if (ins->dreg != X86_EAX)
2700                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2701                         if (saved_edx)
2702                                 x86_pop_reg (code, X86_EDX);
2703                         if (saved_eax)
2704                                 x86_pop_reg (code, X86_EAX);
2705                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2706                         break;
2707                 }
2708                 case OP_ICONST:
2709                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2710                         break;
2711                 case OP_AOTCONST:
2712                         g_assert_not_reached ();
2713                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2714                         x86_mov_reg_imm (code, ins->dreg, 0);
2715                         break;
2716                 case OP_LOAD_GOTADDR:
2717                         x86_call_imm (code, 0);
2718                         /* 
2719                          * The patch needs to point to the pop, since the GOT offset needs 
2720                          * to be added to that address.
2721                          */
2722                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2723                         x86_pop_reg (code, ins->dreg);
2724                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2725                         break;
2726                 case OP_GOT_ENTRY:
2727                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2728                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2729                         break;
2730                 case OP_X86_PUSH_GOT_ENTRY:
2731                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2732                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2733                         break;
2734                 case CEE_CONV_I4:
2735                 case OP_MOVE:
2736                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2737                         break;
2738                 case CEE_CONV_U4:
2739                         g_assert_not_reached ();
2740                 case CEE_JMP: {
2741                         /*
2742                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2743                          * Keep in sync with the code in emit_epilog.
2744                          */
2745                         int pos = 0;
2746
2747                         /* FIXME: no tracing support... */
2748                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2749                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2750                         /* reset offset to make max_len work */
2751                         offset = code - cfg->native_code;
2752
2753                         g_assert (!cfg->method->save_lmf);
2754
2755                         if (cfg->used_int_regs & (1 << X86_EBX))
2756                                 pos -= 4;
2757                         if (cfg->used_int_regs & (1 << X86_EDI))
2758                                 pos -= 4;
2759                         if (cfg->used_int_regs & (1 << X86_ESI))
2760                                 pos -= 4;
2761                         if (pos)
2762                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2763         
2764                         if (cfg->used_int_regs & (1 << X86_ESI))
2765                                 x86_pop_reg (code, X86_ESI);
2766                         if (cfg->used_int_regs & (1 << X86_EDI))
2767                                 x86_pop_reg (code, X86_EDI);
2768                         if (cfg->used_int_regs & (1 << X86_EBX))
2769                                 x86_pop_reg (code, X86_EBX);
2770         
2771                         /* restore ESP/EBP */
2772                         x86_leave (code);
2773                         offset = code - cfg->native_code;
2774                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2775                         x86_jump32 (code, 0);
2776                         break;
2777                 }
2778                 case OP_CHECK_THIS:
2779                         /* ensure ins->sreg1 is not NULL
2780                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2781                          * cmp DWORD PTR [eax], 0
2782                          */
2783                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2784                         break;
2785                 case OP_ARGLIST: {
2786                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2787                         x86_push_reg (code, hreg);
2788                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2789                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2790                         x86_pop_reg (code, hreg);
2791                         break;
2792                 }
2793                 case OP_FCALL:
2794                 case OP_LCALL:
2795                 case OP_VCALL:
2796                 case OP_VOIDCALL:
2797                 case CEE_CALL:
2798                         call = (MonoCallInst*)ins;
2799                         if (ins->flags & MONO_INST_HAS_METHOD)
2800                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2801                         else
2802                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2803                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention)) {
2804                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2805                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2806                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2807                                  * smart enough to do that optimization yet
2808                                  *
2809                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2810                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2811                                  * (most likely from locality benefits). People with other processors should
2812                                  * check on theirs to see what happens.
2813                                  */
2814                                 if (call->stack_usage == 4) {
2815                                         /* we want to use registers that won't get used soon, so use
2816                                          * ecx, as eax will get allocated first. edx is used by long calls,
2817                                          * so we can't use that.
2818                                          */
2819                                         
2820                                         x86_pop_reg (code, X86_ECX);
2821                                 } else {
2822                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2823                                 }
2824                         }
2825                         code = emit_move_return_value (cfg, ins, code);
2826                         break;
2827                 case OP_FCALL_REG:
2828                 case OP_LCALL_REG:
2829                 case OP_VCALL_REG:
2830                 case OP_VOIDCALL_REG:
2831                 case OP_CALL_REG:
2832                         call = (MonoCallInst*)ins;
2833                         x86_call_reg (code, ins->sreg1);
2834                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention)) {
2835                                 if (call->stack_usage == 4)
2836                                         x86_pop_reg (code, X86_ECX);
2837                                 else
2838                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2839                         }
2840                         code = emit_move_return_value (cfg, ins, code);
2841                         break;
2842                 case OP_FCALL_MEMBASE:
2843                 case OP_LCALL_MEMBASE:
2844                 case OP_VCALL_MEMBASE:
2845                 case OP_VOIDCALL_MEMBASE:
2846                 case OP_CALL_MEMBASE:
2847                         call = (MonoCallInst*)ins;
2848                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2849                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature->call_convention)) {
2850                                 if (call->stack_usage == 4)
2851                                         x86_pop_reg (code, X86_ECX);
2852                                 else
2853                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2854                         }
2855                         code = emit_move_return_value (cfg, ins, code);
2856                         break;
2857                 case OP_OUTARG:
2858                 case OP_X86_PUSH:
2859                         x86_push_reg (code, ins->sreg1);
2860                         break;
2861                 case OP_X86_PUSH_IMM:
2862                         x86_push_imm (code, ins->inst_imm);
2863                         break;
2864                 case OP_X86_PUSH_MEMBASE:
2865                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2866                         break;
2867                 case OP_X86_PUSH_OBJ: 
2868                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2869                         x86_push_reg (code, X86_EDI);
2870                         x86_push_reg (code, X86_ESI);
2871                         x86_push_reg (code, X86_ECX);
2872                         if (ins->inst_offset)
2873                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2874                         else
2875                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2876                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2877                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2878                         x86_cld (code);
2879                         x86_prefix (code, X86_REP_PREFIX);
2880                         x86_movsd (code);
2881                         x86_pop_reg (code, X86_ECX);
2882                         x86_pop_reg (code, X86_ESI);
2883                         x86_pop_reg (code, X86_EDI);
2884                         break;
2885                 case OP_X86_LEA:
2886                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2887                         break;
2888                 case OP_X86_LEA_MEMBASE:
2889                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2890                         break;
2891                 case OP_X86_XCHG:
2892                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2893                         break;
2894                 case OP_LOCALLOC:
2895                         /* keep alignment */
2896                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2897                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2898                         code = mono_emit_stack_alloc (code, ins);
2899                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2900                         break;
2901                 case CEE_RET:
2902                         x86_ret (code);
2903                         break;
2904                 case CEE_THROW: {
2905                         x86_push_reg (code, ins->sreg1);
2906                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2907                                                           (gpointer)"mono_arch_throw_exception");
2908                         break;
2909                 }
2910                 case OP_RETHROW: {
2911                         x86_push_reg (code, ins->sreg1);
2912                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2913                                                           (gpointer)"mono_arch_rethrow_exception");
2914                         break;
2915                 }
2916                 case OP_CALL_HANDLER: 
2917                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2918                         x86_call_imm (code, 0);
2919                         break;
2920                 case OP_LABEL:
2921                         ins->inst_c0 = code - cfg->native_code;
2922                         break;
2923                 case CEE_BR:
2924                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2925                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2926                         //break;
2927                         if (ins->flags & MONO_INST_BRLABEL) {
2928                                 if (ins->inst_i0->inst_c0) {
2929                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2930                                 } else {
2931                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2932                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2933                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2934                                                 x86_jump8 (code, 0);
2935                                         else 
2936                                                 x86_jump32 (code, 0);
2937                                 }
2938                         } else {
2939                                 if (ins->inst_target_bb->native_offset) {
2940                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2941                                 } else {
2942                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2943                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2944                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2945                                                 x86_jump8 (code, 0);
2946                                         else 
2947                                                 x86_jump32 (code, 0);
2948                                 } 
2949                         }
2950                         break;
2951                 case OP_BR_REG:
2952                         x86_jump_reg (code, ins->sreg1);
2953                         break;
2954                 case OP_CEQ:
2955                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2956                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2957                         break;
2958                 case OP_CLT:
2959                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2960                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2961                         break;
2962                 case OP_CLT_UN:
2963                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2964                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2965                         break;
2966                 case OP_CGT:
2967                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2968                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2969                         break;
2970                 case OP_CGT_UN:
2971                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2972                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2973                         break;
2974                 case OP_CNE:
2975                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2976                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2977                         break;
2978                 case OP_COND_EXC_EQ:
2979                 case OP_COND_EXC_NE_UN:
2980                 case OP_COND_EXC_LT:
2981                 case OP_COND_EXC_LT_UN:
2982                 case OP_COND_EXC_GT:
2983                 case OP_COND_EXC_GT_UN:
2984                 case OP_COND_EXC_GE:
2985                 case OP_COND_EXC_GE_UN:
2986                 case OP_COND_EXC_LE:
2987                 case OP_COND_EXC_LE_UN:
2988                 case OP_COND_EXC_OV:
2989                 case OP_COND_EXC_NO:
2990                 case OP_COND_EXC_C:
2991                 case OP_COND_EXC_NC:
2992                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
2993                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2994                         break;
2995                 case CEE_BEQ:
2996                 case CEE_BNE_UN:
2997                 case CEE_BLT:
2998                 case CEE_BLT_UN:
2999                 case CEE_BGT:
3000                 case CEE_BGT_UN:
3001                 case CEE_BGE:
3002                 case CEE_BGE_UN:
3003                 case CEE_BLE:
3004                 case CEE_BLE_UN:
3005                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
3006                         break;
3007
3008                 /* floating point opcodes */
3009                 case OP_R8CONST: {
3010                         double d = *(double *)ins->inst_p0;
3011
3012                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
3013                                 x86_fldz (code);
3014                         } else if (d == 1.0) {
3015                                 x86_fld1 (code);
3016                         } else {
3017                                 if (cfg->compile_aot) {
3018                                         guint32 *val = (guint32*)&d;
3019                                         x86_push_imm (code, val [1]);
3020                                         x86_push_imm (code, val [0]);
3021                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
3022                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3023                                 }
3024                                 else {
3025                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
3026                                         x86_fld (code, NULL, TRUE);
3027                                 }
3028                         }
3029                         break;
3030                 }
3031                 case OP_R4CONST: {
3032                         float f = *(float *)ins->inst_p0;
3033
3034                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
3035                                 x86_fldz (code);
3036                         } else if (f == 1.0) {
3037                                 x86_fld1 (code);
3038                         } else {
3039                                 if (cfg->compile_aot) {
3040                                         guint32 val = *(guint32*)&f;
3041                                         x86_push_imm (code, val);
3042                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
3043                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3044                                 }
3045                                 else {
3046                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
3047                                         x86_fld (code, NULL, FALSE);
3048                                 }
3049                         }
3050                         break;
3051                 }
3052                 case OP_STORER8_MEMBASE_REG:
3053                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
3054                         break;
3055                 case OP_LOADR8_SPILL_MEMBASE:
3056                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3057                         x86_fxch (code, 1);
3058                         break;
3059                 case OP_LOADR8_MEMBASE:
3060                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3061                         break;
3062                 case OP_STORER4_MEMBASE_REG:
3063                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3064                         break;
3065                 case OP_LOADR4_MEMBASE:
3066                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3067                         break;
3068                 case CEE_CONV_R4: /* FIXME: change precision */
3069                 case CEE_CONV_R8:
3070                         x86_push_reg (code, ins->sreg1);
3071                         x86_fild_membase (code, X86_ESP, 0, FALSE);
3072                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3073                         break;
3074                 case OP_X86_FP_LOAD_I8:
3075                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3076                         break;
3077                 case OP_X86_FP_LOAD_I4:
3078                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3079                         break;
3080                 case OP_FCONV_TO_I1:
3081                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3082                         break;
3083                 case OP_FCONV_TO_U1:
3084                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3085                         break;
3086                 case OP_FCONV_TO_I2:
3087                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3088                         break;
3089                 case OP_FCONV_TO_U2:
3090                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3091                         break;
3092                 case OP_FCONV_TO_I4:
3093                 case OP_FCONV_TO_I:
3094                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3095                         break;
3096                 case OP_FCONV_TO_I8:
3097                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3098                         x86_fnstcw_membase(code, X86_ESP, 0);
3099                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3100                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3101                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3102                         x86_fldcw_membase (code, X86_ESP, 2);
3103                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3104                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3105                         x86_pop_reg (code, ins->dreg);
3106                         x86_pop_reg (code, ins->unused);
3107                         x86_fldcw_membase (code, X86_ESP, 0);
3108                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3109                         break;
3110                 case OP_LCONV_TO_R_UN: { 
3111                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3112                         guint8 *br;
3113
3114                         /* load 64bit integer to FP stack */
3115                         x86_push_imm (code, 0);
3116                         x86_push_reg (code, ins->sreg2);
3117                         x86_push_reg (code, ins->sreg1);
3118                         x86_fild_membase (code, X86_ESP, 0, TRUE);
3119                         /* store as 80bit FP value */
3120                         x86_fst80_membase (code, X86_ESP, 0);
3121                         
3122                         /* test if lreg is negative */
3123                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3124                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3125         
3126                         /* add correction constant mn */
3127                         x86_fld80_mem (code, mn);
3128                         x86_fld80_membase (code, X86_ESP, 0);
3129                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3130                         x86_fst80_membase (code, X86_ESP, 0);
3131
3132                         x86_patch (br, code);
3133
3134                         x86_fld80_membase (code, X86_ESP, 0);
3135                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
3136
3137                         break;
3138                 }
3139                 case OP_LCONV_TO_OVF_I: {
3140                         guint8 *br [3], *label [1];
3141
3142                         /* 
3143                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3144                          */
3145                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3146
3147                         /* If the low word top bit is set, see if we are negative */
3148                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3149                         /* We are not negative (no top bit set, check for our top word to be zero */
3150                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3151                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3152                         label [0] = code;
3153
3154                         /* throw exception */
3155                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3156                         x86_jump32 (code, 0);
3157         
3158                         x86_patch (br [0], code);
3159                         /* our top bit is set, check that top word is 0xfffffff */
3160                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3161                 
3162                         x86_patch (br [1], code);
3163                         /* nope, emit exception */
3164                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3165                         x86_patch (br [2], label [0]);
3166
3167                         if (ins->dreg != ins->sreg1)
3168                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3169                         break;
3170                 }
3171                 case OP_FADD:
3172                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3173                         break;
3174                 case OP_FSUB:
3175                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3176                         break;          
3177                 case OP_FMUL:
3178                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3179                         break;          
3180                 case OP_FDIV:
3181                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3182                         break;          
3183                 case OP_FNEG:
3184                         x86_fchs (code);
3185                         break;          
3186                 case OP_SIN:
3187                         x86_fsin (code);
3188                         x86_fldz (code);
3189                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3190                         break;          
3191                 case OP_COS:
3192                         x86_fcos (code);
3193                         x86_fldz (code);
3194                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3195                         break;          
3196                 case OP_ABS:
3197                         x86_fabs (code);
3198                         break;          
3199                 case OP_TAN: {
3200                         /* 
3201                          * it really doesn't make sense to inline all this code,
3202                          * it's here just to show that things may not be as simple 
3203                          * as they appear.
3204                          */
3205                         guchar *check_pos, *end_tan, *pop_jump;
3206                         x86_push_reg (code, X86_EAX);
3207                         x86_fptan (code);
3208                         x86_fnstsw (code);
3209                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3210                         check_pos = code;
3211                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3212                         x86_fstp (code, 0); /* pop the 1.0 */
3213                         end_tan = code;
3214                         x86_jump8 (code, 0);
3215                         x86_fldpi (code);
3216                         x86_fp_op (code, X86_FADD, 0);
3217                         x86_fxch (code, 1);
3218                         x86_fprem1 (code);
3219                         x86_fstsw (code);
3220                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3221                         pop_jump = code;
3222                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
3223                         x86_fstp (code, 1);
3224                         x86_fptan (code);
3225                         x86_patch (pop_jump, code);
3226                         x86_fstp (code, 0); /* pop the 1.0 */
3227                         x86_patch (check_pos, code);
3228                         x86_patch (end_tan, code);
3229                         x86_fldz (code);
3230                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3231                         x86_pop_reg (code, X86_EAX);
3232                         break;
3233                 }
3234                 case OP_ATAN:
3235                         x86_fld1 (code);
3236                         x86_fpatan (code);
3237                         x86_fldz (code);
3238                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3239                         break;          
3240                 case OP_SQRT:
3241                         x86_fsqrt (code);
3242                         break;          
3243                 case OP_X86_FPOP:
3244                         x86_fstp (code, 0);
3245                         break;          
3246                 case OP_FREM: {
3247                         guint8 *l1, *l2;
3248
3249                         x86_push_reg (code, X86_EAX);
3250                         /* we need to exchange ST(0) with ST(1) */
3251                         x86_fxch (code, 1);
3252
3253                         /* this requires a loop, because fprem somtimes 
3254                          * returns a partial remainder */
3255                         l1 = code;
3256                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3257                         /* x86_fprem1 (code); */
3258                         x86_fprem (code);
3259                         x86_fnstsw (code);
3260                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3261                         l2 = code + 2;
3262                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3263
3264                         /* pop result */
3265                         x86_fstp (code, 1);
3266
3267                         x86_pop_reg (code, X86_EAX);
3268                         break;
3269                 }
3270                 case OP_FCOMPARE:
3271                         if (cfg->opt & MONO_OPT_FCMOV) {
3272                                 x86_fcomip (code, 1);
3273                                 x86_fstp (code, 0);
3274                                 break;
3275                         }
3276                         /* this overwrites EAX */
3277                         EMIT_FPCOMPARE(code);
3278                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3279                         break;
3280                 case OP_FCEQ:
3281                         if (cfg->opt & MONO_OPT_FCMOV) {
3282                                 /* zeroing the register at the start results in 
3283                                  * shorter and faster code (we can also remove the widening op)
3284                                  */
3285                                 guchar *unordered_check;
3286                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3287                                 x86_fcomip (code, 1);
3288                                 x86_fstp (code, 0);
3289                                 unordered_check = code;
3290                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3291                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3292                                 x86_patch (unordered_check, code);
3293                                 break;
3294                         }
3295                         if (ins->dreg != X86_EAX) 
3296                                 x86_push_reg (code, X86_EAX);
3297
3298                         EMIT_FPCOMPARE(code);
3299                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3300                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3301                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3302                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3303
3304                         if (ins->dreg != X86_EAX) 
3305                                 x86_pop_reg (code, X86_EAX);
3306                         break;
3307                 case OP_FCLT:
3308                 case OP_FCLT_UN:
3309                         if (cfg->opt & MONO_OPT_FCMOV) {
3310                                 /* zeroing the register at the start results in 
3311                                  * shorter and faster code (we can also remove the widening op)
3312                                  */
3313                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3314                                 x86_fcomip (code, 1);
3315                                 x86_fstp (code, 0);
3316                                 if (ins->opcode == OP_FCLT_UN) {
3317                                         guchar *unordered_check = code;
3318                                         guchar *jump_to_end;
3319                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3320                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3321                                         jump_to_end = code;
3322                                         x86_jump8 (code, 0);
3323                                         x86_patch (unordered_check, code);
3324                                         x86_inc_reg (code, ins->dreg);
3325                                         x86_patch (jump_to_end, code);
3326                                 } else {
3327                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3328                                 }
3329                                 break;
3330                         }
3331                         if (ins->dreg != X86_EAX) 
3332                                 x86_push_reg (code, X86_EAX);
3333
3334                         EMIT_FPCOMPARE(code);
3335                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3336                         if (ins->opcode == OP_FCLT_UN) {
3337                                 guchar *is_not_zero_check, *end_jump;
3338                                 is_not_zero_check = code;
3339                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3340                                 end_jump = code;
3341                                 x86_jump8 (code, 0);
3342                                 x86_patch (is_not_zero_check, code);
3343                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3344
3345                                 x86_patch (end_jump, code);
3346                         }
3347                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3348                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3349
3350                         if (ins->dreg != X86_EAX) 
3351                                 x86_pop_reg (code, X86_EAX);
3352                         break;
3353                 case OP_FCGT:
3354                 case OP_FCGT_UN:
3355                         if (cfg->opt & MONO_OPT_FCMOV) {
3356                                 /* zeroing the register at the start results in 
3357                                  * shorter and faster code (we can also remove the widening op)
3358                                  */
3359                                 guchar *unordered_check;
3360                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3361                                 x86_fcomip (code, 1);
3362                                 x86_fstp (code, 0);
3363                                 if (ins->opcode == OP_FCGT) {
3364                                         unordered_check = code;
3365                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
3366                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3367                                         x86_patch (unordered_check, code);
3368                                 } else {
3369                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3370                                 }
3371                                 break;
3372                         }
3373                         if (ins->dreg != X86_EAX) 
3374                                 x86_push_reg (code, X86_EAX);
3375
3376                         EMIT_FPCOMPARE(code);
3377                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3378                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3379                         if (ins->opcode == OP_FCGT_UN) {
3380                                 guchar *is_not_zero_check, *end_jump;
3381                                 is_not_zero_check = code;
3382                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3383                                 end_jump = code;
3384                                 x86_jump8 (code, 0);
3385                                 x86_patch (is_not_zero_check, code);
3386                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3387         
3388                                 x86_patch (end_jump, code);
3389                         }
3390                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3391                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3392
3393                         if (ins->dreg != X86_EAX) 
3394                                 x86_pop_reg (code, X86_EAX);
3395                         break;
3396                 case OP_FBEQ:
3397                         if (cfg->opt & MONO_OPT_FCMOV) {
3398                                 guchar *jump = code;
3399                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
3400                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3401                                 x86_patch (jump, code);
3402                                 break;
3403                         }
3404                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3405                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3406                         break;
3407                 case OP_FBNE_UN:
3408                         /* Branch if C013 != 100 */
3409                         if (cfg->opt & MONO_OPT_FCMOV) {
3410                                 /* branch if !ZF or (PF|CF) */
3411                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3412                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3413                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3414                                 break;
3415                         }
3416                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3417                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3418                         break;
3419                 case OP_FBLT:
3420                         if (cfg->opt & MONO_OPT_FCMOV) {
3421                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3422                                 break;
3423                         }
3424                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3425                         break;
3426                 case OP_FBLT_UN:
3427                         if (cfg->opt & MONO_OPT_FCMOV) {
3428                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3429                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3430                                 break;
3431                         }
3432                         if (ins->opcode == OP_FBLT_UN) {
3433                                 guchar *is_not_zero_check, *end_jump;
3434                                 is_not_zero_check = code;
3435                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3436                                 end_jump = code;
3437                                 x86_jump8 (code, 0);
3438                                 x86_patch (is_not_zero_check, code);
3439                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3440
3441                                 x86_patch (end_jump, code);
3442                         }
3443                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3444                         break;
3445                 case OP_FBGT:
3446                 case OP_FBGT_UN:
3447                         if (cfg->opt & MONO_OPT_FCMOV) {
3448                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3449                                 break;
3450                         }
3451                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3452                         if (ins->opcode == OP_FBGT_UN) {
3453                                 guchar *is_not_zero_check, *end_jump;
3454                                 is_not_zero_check = code;
3455                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3456                                 end_jump = code;
3457                                 x86_jump8 (code, 0);
3458                                 x86_patch (is_not_zero_check, code);
3459                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3460
3461                                 x86_patch (end_jump, code);
3462                         }
3463                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3464                         break;
3465                 case OP_FBGE:
3466                         /* Branch if C013 == 100 or 001 */
3467                         if (cfg->opt & MONO_OPT_FCMOV) {
3468                                 guchar *br1;
3469
3470                                 /* skip branch if C1=1 */
3471                                 br1 = code;
3472                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3473                                 /* branch if (C0 | C3) = 1 */
3474                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3475                                 x86_patch (br1, code);
3476                                 break;
3477                         }
3478                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3479                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3480                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3481                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3482                         break;
3483                 case OP_FBGE_UN:
3484                         /* Branch if C013 == 000 */
3485                         if (cfg->opt & MONO_OPT_FCMOV) {
3486                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3487                                 break;
3488                         }
3489                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3490                         break;
3491                 case OP_FBLE:
3492                         /* Branch if C013=000 or 100 */
3493                         if (cfg->opt & MONO_OPT_FCMOV) {
3494                                 guchar *br1;
3495
3496                                 /* skip branch if C1=1 */
3497                                 br1 = code;
3498                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
3499                                 /* branch if C0=0 */
3500                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3501                                 x86_patch (br1, code);
3502                                 break;
3503                         }
3504                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3505                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3506                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3507                         break;
3508                 case OP_FBLE_UN:
3509                         /* Branch if C013 != 001 */
3510                         if (cfg->opt & MONO_OPT_FCMOV) {
3511                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3512                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3513                                 break;
3514                         }
3515                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3516                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3517                         break;
3518                 case CEE_CKFINITE: {
3519                         x86_push_reg (code, X86_EAX);
3520                         x86_fxam (code);
3521                         x86_fnstsw (code);
3522                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3523                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3524                         x86_pop_reg (code, X86_EAX);
3525                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3526                         break;
3527                 }
3528                 case OP_TLS_GET: {
3529                         x86_prefix (code, X86_GS_PREFIX);
3530                         x86_mov_reg_mem (code, ins->dreg, ins->inst_offset, 4);                 
3531                         break;
3532                 }
3533                 case OP_ATOMIC_ADD_I4: {
3534                         int dreg = ins->dreg;
3535
3536                         if (dreg == ins->inst_basereg) {
3537                                 x86_push_reg (code, ins->sreg2);
3538                                 dreg = ins->sreg2;
3539                         } 
3540                         
3541                         if (dreg != ins->sreg2)
3542                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3543
3544                         x86_prefix (code, X86_LOCK_PREFIX);
3545                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3546
3547                         if (dreg != ins->dreg) {
3548                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3549                                 x86_pop_reg (code, dreg);
3550                         }
3551
3552                         break;
3553                 }
3554                 case OP_ATOMIC_ADD_NEW_I4: {
3555                         int dreg = ins->dreg;
3556
3557                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3558                         if (ins->sreg2 == dreg) {
3559                                 if (dreg == X86_EBX) {
3560                                         dreg = X86_EDI;
3561                                         if (ins->inst_basereg == X86_EDI)
3562                                                 dreg = X86_ESI;
3563                                 } else {
3564                                         dreg = X86_EBX;
3565                                         if (ins->inst_basereg == X86_EBX)
3566                                                 dreg = X86_EDI;
3567                                 }
3568                         } else if (ins->inst_basereg == dreg) {
3569                                 if (dreg == X86_EBX) {
3570                                         dreg = X86_EDI;
3571                                         if (ins->sreg2 == X86_EDI)
3572                                                 dreg = X86_ESI;
3573                                 } else {
3574                                         dreg = X86_EBX;
3575                                         if (ins->sreg2 == X86_EBX)
3576                                                 dreg = X86_EDI;
3577                                 }
3578                         }
3579
3580                         if (dreg != ins->dreg) {
3581                                 x86_push_reg (code, dreg);
3582                                 x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3583                         }
3584
3585                         x86_prefix (code, X86_LOCK_PREFIX);
3586                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3587                         /* dreg contains the old value, add with sreg2 value */
3588                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3589                         
3590                         if (ins->dreg != dreg) {
3591                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3592                                 x86_pop_reg (code, dreg);
3593                         }
3594
3595                         break;
3596                 }
3597                 case OP_ATOMIC_EXCHANGE_I4: {
3598                         guchar *br[2];
3599                         int sreg2 = ins->sreg2;
3600                         int breg = ins->inst_basereg;
3601
3602                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3603                          * hack to overcome limits in x86 reg allocator 
3604                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3605                          */
3606                         if (ins->dreg != X86_EAX)
3607                                 x86_push_reg (code, X86_EAX);
3608                         
3609                         /* We need the EAX reg for the cmpxchg */
3610                         if (ins->sreg2 == X86_EAX) {
3611                                 x86_push_reg (code, X86_EDX);
3612                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3613                                 sreg2 = X86_EDX;
3614                         }
3615
3616                         if (breg == X86_EAX) {
3617                                 x86_push_reg (code, X86_ESI);
3618                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3619                                 breg = X86_ESI;
3620                         }
3621
3622                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3623
3624                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3625                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3626                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3627                         x86_patch (br [1], br [0]);
3628
3629                         if (breg != ins->inst_basereg)
3630                                 x86_pop_reg (code, X86_ESI);
3631
3632                         if (ins->dreg != X86_EAX) {
3633                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3634                                 x86_pop_reg (code, X86_EAX);
3635                         }
3636
3637                         if (ins->sreg2 != sreg2)
3638                                 x86_pop_reg (code, X86_EDX);
3639
3640                         break;
3641                 }
3642                 default:
3643                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3644                         g_assert_not_reached ();
3645                 }
3646
3647                 if ((code - cfg->native_code - offset) > max_len) {
3648                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3649                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3650                         g_assert_not_reached ();
3651                 }
3652                
3653                 cpos += max_len;
3654
3655                 last_ins = ins;
3656                 last_offset = offset;
3657                 
3658                 ins = ins->next;
3659         }
3660
3661         cfg->code_len = code - cfg->native_code;
3662 }
3663
3664 void
3665 mono_arch_register_lowlevel_calls (void)
3666 {
3667 }
3668
3669 void
3670 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3671 {
3672         MonoJumpInfo *patch_info;
3673         gboolean compile_aot = !run_cctors;
3674
3675         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3676                 unsigned char *ip = patch_info->ip.i + code;
3677                 const unsigned char *target;
3678
3679                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3680
3681                 if (compile_aot) {
3682                         switch (patch_info->type) {
3683                         case MONO_PATCH_INFO_BB:
3684                         case MONO_PATCH_INFO_LABEL:
3685                                 break;
3686                         default:
3687                                 /* No need to patch these */
3688                                 continue;
3689                         }
3690                 }
3691
3692                 switch (patch_info->type) {
3693                 case MONO_PATCH_INFO_IP:
3694                         *((gconstpointer *)(ip)) = target;
3695                         break;
3696                 case MONO_PATCH_INFO_CLASS_INIT: {
3697                         guint8 *code = ip;
3698                         /* Might already been changed to a nop */
3699                         x86_call_code (code, 0);
3700                         x86_patch (ip, target);
3701                         break;
3702                 }
3703                 case MONO_PATCH_INFO_ABS:
3704                 case MONO_PATCH_INFO_METHOD:
3705                 case MONO_PATCH_INFO_METHOD_JUMP:
3706                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3707                 case MONO_PATCH_INFO_BB:
3708                 case MONO_PATCH_INFO_LABEL:
3709                         x86_patch (ip, target);
3710                         break;
3711                 case MONO_PATCH_INFO_NONE:
3712                         break;
3713                 default: {
3714                         guint32 offset = mono_arch_get_patch_offset (ip);
3715                         *((gconstpointer *)(ip + offset)) = target;
3716                         break;
3717                 }
3718                 }
3719         }
3720 }
3721
3722 guint8 *
3723 mono_arch_emit_prolog (MonoCompile *cfg)
3724 {
3725         MonoMethod *method = cfg->method;
3726         MonoBasicBlock *bb;
3727         MonoMethodSignature *sig;
3728         MonoInst *inst;
3729         int alloc_size, pos, max_offset, i;
3730         guint8 *code;
3731
3732         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3733         code = cfg->native_code = g_malloc (cfg->code_size);
3734
3735         x86_push_reg (code, X86_EBP);
3736         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3737
3738         alloc_size = - cfg->stack_offset;
3739         pos = 0;
3740
3741         if (method->save_lmf) {
3742                 pos += sizeof (MonoLMF);
3743
3744                 /* save the current IP */
3745                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3746                 x86_push_imm_template (code);
3747
3748                 /* save all caller saved regs */
3749                 x86_push_reg (code, X86_EBP);
3750                 x86_push_reg (code, X86_ESI);
3751                 x86_push_reg (code, X86_EDI);
3752                 x86_push_reg (code, X86_EBX);
3753
3754                 /* save method info */
3755                 x86_push_imm (code, method);
3756
3757                 /* get the address of lmf for the current thread */
3758                 /* 
3759                  * This is performance critical so we try to use some tricks to make
3760                  * it fast.
3761                  */
3762                 if (lmf_tls_offset != -1) {
3763                         /* Load lmf quicky using the GS register */
3764                         x86_prefix (code, X86_GS_PREFIX);
3765                         x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
3766                 }
3767                 else {
3768                         if (cfg->compile_aot) {
3769                                 /* The GOT var does not exist yet */
3770                                 x86_call_imm (code, 0);
3771                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3772                                 x86_pop_reg (code, X86_EAX);
3773                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
3774                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3775                                 x86_call_membase (code, X86_EAX, 0xf0f0f0f0);
3776                         }
3777                         else
3778                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3779                 }
3780
3781                 /* push lmf */
3782                 x86_push_reg (code, X86_EAX); 
3783                 /* push *lfm (previous_lmf) */
3784                 x86_push_membase (code, X86_EAX, 0);
3785                 /* *(lmf) = ESP */
3786                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3787         } else {
3788
3789                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3790                         x86_push_reg (code, X86_EBX);
3791                         pos += 4;
3792                 }
3793
3794                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3795                         x86_push_reg (code, X86_EDI);
3796                         pos += 4;
3797                 }
3798
3799                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3800                         x86_push_reg (code, X86_ESI);
3801                         pos += 4;
3802                 }
3803         }
3804
3805         alloc_size -= pos;
3806
3807         if (alloc_size) {
3808                 /* See mono_emit_stack_alloc */
3809 #ifdef PLATFORM_WIN32
3810                 guint32 remaining_size = alloc_size;
3811                 while (remaining_size >= 0x1000) {
3812                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3813                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3814                         remaining_size -= 0x1000;
3815                 }
3816                 if (remaining_size)
3817                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3818 #else
3819                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3820 #endif
3821         }
3822
3823         /* compute max_offset in order to use short forward jumps */
3824         max_offset = 0;
3825         if (cfg->opt & MONO_OPT_BRANCH) {
3826                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3827                         MonoInst *ins = bb->code;
3828                         bb->max_offset = max_offset;
3829
3830                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3831                                 max_offset += 6;
3832                         /* max alignment for loops */
3833                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3834                                 max_offset += LOOP_ALIGNMENT;
3835
3836                         while (ins) {
3837                                 if (ins->opcode == OP_LABEL)
3838                                         ins->inst_c1 = max_offset;
3839                                 
3840                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3841                                 ins = ins->next;
3842                         }
3843                 }
3844         }
3845
3846         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3847                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3848
3849         /* load arguments allocated to register from the stack */
3850         sig = method->signature;
3851         pos = 0;
3852
3853         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3854                 inst = cfg->varinfo [pos];
3855                 if (inst->opcode == OP_REGVAR) {
3856                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3857                         if (cfg->verbose_level > 2)
3858                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3859                 }
3860                 pos++;
3861         }
3862
3863         cfg->code_len = code - cfg->native_code;
3864
3865         return code;
3866 }
3867
3868 void
3869 mono_arch_emit_epilog (MonoCompile *cfg)
3870 {
3871         MonoMethod *method = cfg->method;
3872         MonoMethodSignature *sig = method->signature;
3873         int pos;
3874         guint32 stack_to_pop;
3875         guint8 *code;
3876         int max_epilog_size = 16;
3877         
3878         if (cfg->method->save_lmf)
3879                 max_epilog_size += 128;
3880         
3881         if (mono_jit_trace_calls != NULL)
3882                 max_epilog_size += 50;
3883
3884         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3885                 cfg->code_size *= 2;
3886                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3887                 mono_jit_stats.code_reallocs++;
3888         }
3889
3890         code = cfg->native_code + cfg->code_len;
3891
3892         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3893                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3894
3895         /* the code restoring the registers must be kept in sync with CEE_JMP */
3896         pos = 0;
3897         
3898         if (method->save_lmf) {
3899                 gint32 prev_lmf_reg;
3900
3901                 /* Find a spare register */
3902                 switch (sig->ret->type) {
3903                 case MONO_TYPE_I8:
3904                 case MONO_TYPE_U8:
3905                         prev_lmf_reg = X86_EDI;
3906                         cfg->used_int_regs |= (1 << X86_EDI);
3907                         break;
3908                 default:
3909                         prev_lmf_reg = X86_EDX;
3910                         break;
3911                 }
3912
3913                 /* reg = previous_lmf */
3914                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, -32, 4);
3915
3916                 /* ecx = lmf */
3917                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, -28, 4);
3918
3919                 /* *(lmf) = previous_lmf */
3920                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3921
3922                 /* restore caller saved regs */
3923                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3924                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, -20, 4);
3925                 }
3926
3927                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3928                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, -16, 4);
3929                 }
3930                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3931                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, -12, 4);
3932                 }
3933
3934                 /* EBP is restored by LEAVE */
3935         } else {
3936                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3937                         pos -= 4;
3938                 }
3939                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3940                         pos -= 4;
3941                 }
3942                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3943                         pos -= 4;
3944                 }
3945
3946                 if (pos)
3947                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3948
3949                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3950                         x86_pop_reg (code, X86_ESI);
3951                 }
3952                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3953                         x86_pop_reg (code, X86_EDI);
3954                 }
3955                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3956                         x86_pop_reg (code, X86_EBX);
3957                 }
3958         }
3959
3960         x86_leave (code);
3961
3962         if (CALLCONV_IS_STDCALL (sig->call_convention)) {
3963                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3964
3965                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3966         } else if (MONO_TYPE_ISSTRUCT (cfg->method->signature->ret))
3967                 stack_to_pop = 4;
3968         else
3969                 stack_to_pop = 0;
3970
3971         if (stack_to_pop)
3972                 x86_ret_imm (code, stack_to_pop);
3973         else
3974                 x86_ret (code);
3975
3976         cfg->code_len = code - cfg->native_code;
3977
3978         g_assert (cfg->code_len < cfg->code_size);
3979
3980 }
3981
3982 void
3983 mono_arch_emit_exceptions (MonoCompile *cfg)
3984 {
3985         MonoJumpInfo *patch_info;
3986         int nthrows, i;
3987         guint8 *code;
3988         MonoClass *exc_classes [16];
3989         guint8 *exc_throw_start [16], *exc_throw_end [16];
3990         guint32 code_size;
3991         int exc_count = 0;
3992
3993         /* Compute needed space */
3994         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3995                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3996                         exc_count++;
3997         }
3998
3999         /* 
4000          * make sure we have enough space for exceptions
4001          * 16 is the size of two push_imm instructions and a call
4002          */
4003         if (cfg->compile_aot)
4004                 code_size = exc_count * 32;
4005         else
4006                 code_size = exc_count * 16;
4007
4008         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4009                 cfg->code_size *= 2;
4010                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4011                 mono_jit_stats.code_reallocs++;
4012         }
4013
4014         code = cfg->native_code + cfg->code_len;
4015
4016         nthrows = 0;
4017         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4018                 switch (patch_info->type) {
4019                 case MONO_PATCH_INFO_EXC: {
4020                         MonoClass *exc_class;
4021                         guint8 *buf, *buf2;
4022                         guint32 throw_ip;
4023
4024                         x86_patch (patch_info->ip.i + cfg->native_code, code);
4025
4026                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4027                         g_assert (exc_class);
4028                         throw_ip = patch_info->ip.i;
4029
4030                         /* Find a throw sequence for the same exception class */
4031                         for (i = 0; i < nthrows; ++i)
4032                                 if (exc_classes [i] == exc_class)
4033                                         break;
4034                         if (i < nthrows) {
4035                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4036                                 x86_jump_code (code, exc_throw_start [i]);
4037                                 patch_info->type = MONO_PATCH_INFO_NONE;
4038                         }
4039                         else {
4040                                 guint32 got_reg = X86_EAX;
4041                                 guint32 size;
4042
4043                                 /* Compute size of code following the push <OFFSET> */
4044                                 if (cfg->compile_aot) {
4045                                         size = 5 + 6;
4046                                         if (!cfg->got_var)
4047                                                 size += 32;
4048                                         else if (cfg->got_var->opcode == OP_REGOFFSET)
4049                                                 size += 6;
4050                                 }
4051                                 else
4052                                         size = 5 + 5;
4053
4054                                 if ((code - cfg->native_code) - throw_ip < 127 - size) {
4055                                         /* Use the shorter form */
4056                                         buf = buf2 = code;
4057                                         x86_push_imm (code, 0);
4058                                 }
4059                                 else {
4060                                         buf = code;
4061                                         x86_push_imm (code, 0xf0f0f0f0);
4062                                         buf2 = code;
4063                                 }
4064
4065                                 if (nthrows < 16) {
4066                                         exc_classes [nthrows] = exc_class;
4067                                         exc_throw_start [nthrows] = code;
4068                                 }
4069
4070                                 if (cfg->compile_aot) {          
4071                                         /*
4072                                          * Since the patches are generated by the back end, there is                                     * no way to generate a got_var at this point.   
4073                                          */
4074                                         if (!cfg->got_var) {
4075                                                 x86_call_imm (code, 0);
4076                                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
4077                                                 x86_pop_reg (code, X86_EAX);
4078                                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
4079                                         }
4080                                         else {
4081                                                 if (cfg->got_var->opcode == OP_REGOFFSET)
4082                                                         x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
4083                                                 else
4084                                                         got_reg = cfg->got_var->dreg;
4085                                         }
4086                                 }
4087
4088                                 x86_push_imm (code, exc_class->type_token);
4089                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4090                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4091                                 patch_info->ip.i = code - cfg->native_code;
4092                                 if (cfg->compile_aot)
4093                                         x86_call_membase (code, got_reg, 0xf0f0f0f0);
4094                                 else
4095                                         x86_call_code (code, 0);
4096                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4097                                 while (buf < buf2)
4098                                         x86_nop (buf);
4099
4100                                 if (nthrows < 16) {
4101                                         exc_throw_end [nthrows] = code;
4102                                         nthrows ++;
4103                                 }
4104                         }
4105                         break;
4106                 }
4107                 default:
4108                         /* do nothing */
4109                         break;
4110                 }
4111         }
4112
4113         cfg->code_len = code - cfg->native_code;
4114
4115         g_assert (cfg->code_len < cfg->code_size);
4116 }
4117
4118 void
4119 mono_arch_flush_icache (guint8 *code, gint size)
4120 {
4121         /* not needed */
4122 }
4123
4124 void
4125 mono_arch_flush_register_windows (void)
4126 {
4127 }
4128
4129 /*
4130  * Support for fast access to the thread-local lmf structure using the GS
4131  * segment register on NPTL + kernel 2.6.x.
4132  */
4133
4134 static gboolean tls_offset_inited = FALSE;
4135
4136 /* code should be simply return <tls var>; */
4137 static int read_tls_offset_from_method (void* method)
4138 {
4139         guint8* code = (guint8*) method;
4140         /* 
4141          * Determine the offset of the variable inside the TLS structures
4142          * by disassembling the function.
4143          */
4144
4145         /* gcc-3.3.2
4146          *
4147          * push ebp
4148          * mov ebp, esp
4149          * mov eax, gs:0
4150          * mov eax, DWORD PTR [eax+<offset>]
4151          */
4152         if (
4153                 (code [0] == 0x55) && (code [1] == 0x89) && (code [2] == 0xe5) &&
4154                 (code [3] == 0x65) && (code [4] == 0xa1) && (code [5] == 0x00) &&
4155                 (code [6] == 0x00) && (code [7] == 0x00) && (code [8] == 0x00) &&
4156                 (code [9] == 0x8b) && (code [10] == 0x80)) {
4157                 return *(int*)&(code [11]);
4158         }
4159         
4160         /* gcc-3.4
4161          *
4162          * push ebp
4163          * mov ebp, esp
4164          * mov eax, gs:<offset>
4165          */
4166         if (
4167                 (code [0] == 0x55) && (code [1] == 0x89) && (code [2] == 0xe5) &&
4168                 (code [3] == 0x65) && (code [4] == 0xa1)) {
4169                 return *(int*)&(code [5]);
4170         }
4171         
4172         /* 3.2.2 with -march=athlon
4173          *
4174          * push ebp
4175          * mov eax, gs:<offset>
4176          * mov ebp, esp
4177          */
4178         if (
4179                 (code [0] == 0x55) && (code [1] == 0x65) && (code [2] == 0xa1)) {
4180                 return *(int*)&(code [3]);
4181         }
4182         
4183         return -1;
4184 }
4185 void
4186 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4187 {
4188 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4189         pthread_t self = pthread_self();
4190         pthread_attr_t attr;
4191         void *staddr = NULL;
4192         size_t stsize = 0;
4193         struct sigaltstack sa;
4194 #endif
4195
4196         if (!tls_offset_inited) {
4197                 tls_offset_inited = TRUE;
4198                 if (getenv ("MONO_NPTL")) {
4199                         lmf_tls_offset = read_tls_offset_from_method (mono_get_lmf_addr);
4200                         appdomain_tls_offset = read_tls_offset_from_method (mono_domain_get);
4201                         thread_tls_offset = read_tls_offset_from_method (mono_thread_current);
4202                 }
4203         }               
4204
4205 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4206
4207         /* Determine stack boundaries */
4208         if (!mono_running_on_valgrind ()) {
4209 #ifdef HAVE_PTHREAD_GETATTR_NP
4210                 pthread_getattr_np( self, &attr );
4211 #else
4212 #ifdef HAVE_PTHREAD_ATTR_GET_NP
4213                 pthread_attr_get_np( self, &attr );
4214 #elif defined(sun)
4215                 pthread_attr_init( &attr );
4216                 pthread_attr_getstacksize( &attr, &stsize );
4217 #else
4218 #error "Not implemented"
4219 #endif
4220 #endif
4221 #ifndef sun
4222                 pthread_attr_getstack( &attr, &staddr, &stsize );
4223 #endif
4224         }
4225
4226         /* 
4227          * staddr seems to be wrong for the main thread, so we keep the value in
4228          * tls->end_of_stack
4229          */
4230         tls->stack_size = stsize;
4231
4232         /* Setup an alternate signal stack */
4233         tls->signal_stack = g_malloc (SIGNAL_STACK_SIZE);
4234         tls->signal_stack_size = SIGNAL_STACK_SIZE;
4235
4236         sa.ss_sp = tls->signal_stack;
4237         sa.ss_size = SIGNAL_STACK_SIZE;
4238         sa.ss_flags = SS_ONSTACK;
4239         sigaltstack (&sa, NULL);
4240 #endif
4241 }
4242
4243 void
4244 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4245 {
4246 #ifdef MONO_ARCH_SIGSEGV_ON_ALTSTACK
4247         struct sigaltstack sa;
4248
4249         sa.ss_sp = tls->signal_stack;
4250         sa.ss_size = SIGNAL_STACK_SIZE;
4251         sa.ss_flags = SS_DISABLE;
4252         sigaltstack  (&sa, NULL);
4253
4254         if (tls->signal_stack)
4255                 g_free (tls->signal_stack);
4256 #endif
4257 }
4258
4259 void
4260 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
4261 {
4262
4263         /* add the this argument */
4264         if (this_reg != -1) {
4265                 MonoInst *this;
4266                 MONO_INST_NEW (cfg, this, OP_OUTARG);
4267                 this->type = this_type;
4268                 this->sreg1 = this_reg;
4269                 mono_bblock_add_inst (cfg->cbb, this);
4270         }
4271
4272         if (vt_reg != -1) {
4273                 MonoInst *vtarg;
4274                 MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
4275                 vtarg->type = STACK_MP;
4276                 vtarg->sreg1 = vt_reg;
4277                 mono_bblock_add_inst (cfg->cbb, vtarg);
4278         }
4279 }
4280
4281
4282 MonoInst*
4283 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4284 {
4285         MonoInst *ins = NULL;
4286
4287         if (cmethod->klass == mono_defaults.math_class) {
4288                 if (strcmp (cmethod->name, "Sin") == 0) {
4289                         MONO_INST_NEW (cfg, ins, OP_SIN);
4290                         ins->inst_i0 = args [0];
4291                 } else if (strcmp (cmethod->name, "Cos") == 0) {
4292                         MONO_INST_NEW (cfg, ins, OP_COS);
4293                         ins->inst_i0 = args [0];
4294                 } else if (strcmp (cmethod->name, "Tan") == 0) {
4295                         MONO_INST_NEW (cfg, ins, OP_TAN);
4296                         ins->inst_i0 = args [0];
4297                 } else if (strcmp (cmethod->name, "Atan") == 0) {
4298                         MONO_INST_NEW (cfg, ins, OP_ATAN);
4299                         ins->inst_i0 = args [0];
4300                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
4301                         MONO_INST_NEW (cfg, ins, OP_SQRT);
4302                         ins->inst_i0 = args [0];
4303                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
4304                         MONO_INST_NEW (cfg, ins, OP_ABS);
4305                         ins->inst_i0 = args [0];
4306                 }
4307 #if 0
4308                 /* OP_FREM is not IEEE compatible */
4309                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
4310                         MONO_INST_NEW (cfg, ins, OP_FREM);
4311                         ins->inst_i0 = args [0];
4312                         ins->inst_i1 = args [1];
4313                 }
4314 #endif
4315         } else if(cmethod->klass->image == mono_defaults.corlib &&
4316                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4317                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4318
4319                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4320                         MonoInst *ins_iconst;
4321
4322                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4323                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4324                         ins_iconst->inst_c0 = 1;
4325
4326                         ins->inst_i0 = args [0];
4327                         ins->inst_i1 = ins_iconst;
4328                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4329                         MonoInst *ins_iconst;
4330
4331                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
4332                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
4333                         ins_iconst->inst_c0 = -1;
4334
4335                         ins->inst_i0 = args [0];
4336                         ins->inst_i1 = ins_iconst;
4337                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4338                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
4339
4340                         ins->inst_i0 = args [0];
4341                         ins->inst_i1 = args [1];
4342                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
4343                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_I4);
4344
4345                         ins->inst_i0 = args [0];
4346                         ins->inst_i1 = args [1];
4347                 }
4348         }
4349
4350         return ins;
4351 }
4352
4353
4354 gboolean
4355 mono_arch_print_tree (MonoInst *tree, int arity)
4356 {
4357         return 0;
4358 }
4359
4360 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4361 {
4362         MonoInst* ins;
4363         
4364         if (appdomain_tls_offset == -1)
4365                 return NULL;
4366         
4367         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4368         ins->inst_offset = appdomain_tls_offset;
4369         return ins;
4370 }
4371
4372 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4373 {
4374         MonoInst* ins;
4375         
4376         if (thread_tls_offset == -1)
4377                 return NULL;
4378         
4379         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4380         ins->inst_offset = thread_tls_offset;
4381         return ins;
4382 }
4383
4384 guint32
4385 mono_arch_get_patch_offset (guint8 *code)
4386 {
4387         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
4388                 return 2;
4389         else if ((code [0] == 0xba))
4390                 return 1;
4391         else if ((code [0] == 0x68))
4392                 /* push IMM */
4393                 return 1;
4394         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
4395                 /* push <OFFSET>(<REG>) */
4396                 return 2;
4397         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
4398                 /* call *<OFFSET>(<REG>) */
4399                 return 2;
4400         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
4401                 /* fldl <ADDR> */
4402                 return 2;
4403         else if ((code [0] == 0x58) && (code [1] == 0x05))
4404                 /* pop %eax; add <OFFSET>, %eax */
4405                 return 2;
4406         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
4407                 /* pop <REG>; add <OFFSET>, <REG> */
4408                 return 3;
4409         else {
4410                 g_assert_not_reached ();
4411                 return -1;
4412         }
4413 }