Fri Sep 30 19:10:29 CEST 2005 Paolo Molaro <lupus@ximian.com>
[mono.git] / mono / mini / mini-x86.c
1 /*
2  * mini-x86.c: x86 backend for the Mono code generator
3  *
4  * Authors:
5  *   Paolo Molaro (lupus@ximian.com)
6  *   Dietmar Maurer (dietmar@ximian.com)
7  *   Patrik Torstensson
8  *
9  * (C) 2003 Ximian, Inc.
10  */
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14
15 #include <mono/metadata/appdomain.h>
16 #include <mono/metadata/debug-helpers.h>
17 #include <mono/metadata/threads.h>
18 #include <mono/metadata/profiler-private.h>
19 #include <mono/utils/mono-math.h>
20
21 #include "trace.h"
22 #include "mini-x86.h"
23 #include "inssel.h"
24 #include "cpu-pentium.h"
25
26 /* On windows, these hold the key returned by TlsAlloc () */
27 static gint lmf_tls_offset = -1;
28 static gint appdomain_tls_offset = -1;
29 static gint thread_tls_offset = -1;
30
31 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
32
33 #define ARGS_OFFSET 8
34
35 #ifdef PLATFORM_WIN32
36 /* Under windows, the default pinvoke calling convention is stdcall */
37 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
38 #else
39 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
40 #endif
41
42 #define NOT_IMPLEMENTED g_assert_not_reached ()
43
44 const char*
45 mono_arch_regname (int reg) {
46         switch (reg) {
47         case X86_EAX: return "%eax";
48         case X86_EBX: return "%ebx";
49         case X86_ECX: return "%ecx";
50         case X86_EDX: return "%edx";
51         case X86_ESP: return "%esp";    case X86_EBP: return "%ebp";
52         case X86_EDI: return "%edi";
53         case X86_ESI: return "%esi";
54         }
55         return "unknown";
56 }
57
58 const char*
59 mono_arch_fregname (int reg) {
60         return "unknown";
61 }
62
63 typedef enum {
64         ArgInIReg,
65         ArgInFloatSSEReg,
66         ArgInDoubleSSEReg,
67         ArgOnStack,
68         ArgValuetypeInReg,
69         ArgOnFloatFpStack,
70         ArgOnDoubleFpStack,
71         ArgNone
72 } ArgStorage;
73
74 typedef struct {
75         gint16 offset;
76         gint8  reg;
77         ArgStorage storage;
78
79         /* Only if storage == ArgValuetypeInReg */
80         ArgStorage pair_storage [2];
81         gint8 pair_regs [2];
82 } ArgInfo;
83
84 typedef struct {
85         int nargs;
86         guint32 stack_usage;
87         guint32 reg_usage;
88         guint32 freg_usage;
89         gboolean need_stack_align;
90         ArgInfo ret;
91         ArgInfo sig_cookie;
92         ArgInfo args [1];
93 } CallInfo;
94
95 #define PARAM_REGS 0
96
97 #define FLOAT_PARAM_REGS 0
98
99 static X86_Reg_No param_regs [] = { 0 };
100
101 #ifdef PLATFORM_WIN32
102 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
103 #endif
104
105 static void inline
106 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
107 {
108     ainfo->offset = *stack_size;
109
110     if (*gr >= PARAM_REGS) {
111                 ainfo->storage = ArgOnStack;
112                 (*stack_size) += sizeof (gpointer);
113     }
114     else {
115                 ainfo->storage = ArgInIReg;
116                 ainfo->reg = param_regs [*gr];
117                 (*gr) ++;
118     }
119 }
120
121 static void inline
122 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
123 {
124         ainfo->offset = *stack_size;
125
126         g_assert (PARAM_REGS == 0);
127         
128         ainfo->storage = ArgOnStack;
129         (*stack_size) += sizeof (gpointer) * 2;
130 }
131
132 static void inline
133 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
134 {
135     ainfo->offset = *stack_size;
136
137     if (*gr >= FLOAT_PARAM_REGS) {
138                 ainfo->storage = ArgOnStack;
139                 (*stack_size) += is_double ? 8 : 4;
140     }
141     else {
142                 /* A double register */
143                 if (is_double)
144                         ainfo->storage = ArgInDoubleSSEReg;
145                 else
146                         ainfo->storage = ArgInFloatSSEReg;
147                 ainfo->reg = *gr;
148                 (*gr) += 1;
149     }
150 }
151
152
153 static void
154 add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
155                gboolean is_return,
156                guint32 *gr, guint32 *fr, guint32 *stack_size)
157 {
158         guint32 size;
159         MonoClass *klass;
160
161         klass = mono_class_from_mono_type (type);
162         if (sig->pinvoke) 
163                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
164         else 
165                 size = mono_type_stack_size (&klass->byval_arg, NULL);
166
167 #ifdef PLATFORM_WIN32
168         if (sig->pinvoke && is_return) {
169                 MonoMarshalType *info;
170
171                 /*
172                  * the exact rules are not very well documented, the code below seems to work with the 
173                  * code generated by gcc 3.3.3 -mno-cygwin.
174                  */
175                 info = mono_marshal_load_type_info (klass);
176                 g_assert (info);
177
178                 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
179
180                 /* Special case structs with only a float member */
181                 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
182                         ainfo->storage = ArgValuetypeInReg;
183                         ainfo->pair_storage [0] = ArgOnDoubleFpStack;
184                         return;
185                 }
186                 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
187                         ainfo->storage = ArgValuetypeInReg;
188                         ainfo->pair_storage [0] = ArgOnFloatFpStack;
189                         return;
190                 }               
191                 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
192                         ainfo->storage = ArgValuetypeInReg;
193                         ainfo->pair_storage [0] = ArgInIReg;
194                         ainfo->pair_regs [0] = return_regs [0];
195                         if (info->native_size > 4) {
196                                 ainfo->pair_storage [1] = ArgInIReg;
197                                 ainfo->pair_regs [1] = return_regs [1];
198                         }
199                         return;
200                 }
201         }
202 #endif
203
204         ainfo->offset = *stack_size;
205         ainfo->storage = ArgOnStack;
206         *stack_size += ALIGN_TO (size, sizeof (gpointer));
207 }
208
209 /*
210  * get_call_info:
211  *
212  *  Obtain information about a call according to the calling convention.
213  * For x86 ELF, see the "System V Application Binary Interface Intel386 
214  * Architecture Processor Supplment, Fourth Edition" document for more
215  * information.
216  * For x86 win32, see ???.
217  */
218 static CallInfo*
219 get_call_info (MonoMethodSignature *sig, gboolean is_pinvoke)
220 {
221         guint32 i, gr, fr;
222         MonoType *ret_type;
223         int n = sig->hasthis + sig->param_count;
224         guint32 stack_size = 0;
225         CallInfo *cinfo;
226
227         cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
228
229         gr = 0;
230         fr = 0;
231
232         /* return value */
233         {
234                 ret_type = mono_type_get_underlying_type (sig->ret);
235                 switch (ret_type->type) {
236                 case MONO_TYPE_BOOLEAN:
237                 case MONO_TYPE_I1:
238                 case MONO_TYPE_U1:
239                 case MONO_TYPE_I2:
240                 case MONO_TYPE_U2:
241                 case MONO_TYPE_CHAR:
242                 case MONO_TYPE_I4:
243                 case MONO_TYPE_U4:
244                 case MONO_TYPE_I:
245                 case MONO_TYPE_U:
246                 case MONO_TYPE_PTR:
247                 case MONO_TYPE_FNPTR:
248                 case MONO_TYPE_CLASS:
249                 case MONO_TYPE_OBJECT:
250                 case MONO_TYPE_SZARRAY:
251                 case MONO_TYPE_ARRAY:
252                 case MONO_TYPE_STRING:
253                         cinfo->ret.storage = ArgInIReg;
254                         cinfo->ret.reg = X86_EAX;
255                         break;
256                 case MONO_TYPE_U8:
257                 case MONO_TYPE_I8:
258                         cinfo->ret.storage = ArgInIReg;
259                         cinfo->ret.reg = X86_EAX;
260                         break;
261                 case MONO_TYPE_R4:
262                         cinfo->ret.storage = ArgOnFloatFpStack;
263                         break;
264                 case MONO_TYPE_R8:
265                         cinfo->ret.storage = ArgOnDoubleFpStack;
266                         break;
267                 case MONO_TYPE_VALUETYPE: {
268                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
269
270                         add_valuetype (sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
271                         if (cinfo->ret.storage == ArgOnStack)
272                                 /* The caller passes the address where the value is stored */
273                                 add_general (&gr, &stack_size, &cinfo->ret);
274                         break;
275                 }
276                 case MONO_TYPE_TYPEDBYREF:
277                         /* Same as a valuetype with size 24 */
278                         add_general (&gr, &stack_size, &cinfo->ret);
279                         ;
280                         break;
281                 case MONO_TYPE_VOID:
282                         cinfo->ret.storage = ArgNone;
283                         break;
284                 default:
285                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
286                 }
287         }
288
289         /* this */
290         if (sig->hasthis)
291                 add_general (&gr, &stack_size, cinfo->args + 0);
292
293         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
294                 gr = PARAM_REGS;
295                 fr = FLOAT_PARAM_REGS;
296                 
297                 /* Emit the signature cookie just before the implicit arguments */
298                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
299         }
300
301         for (i = 0; i < sig->param_count; ++i) {
302                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
303                 MonoType *ptype;
304
305                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
306                         /* We allways pass the sig cookie on the stack for simplicity */
307                         /* 
308                          * Prevent implicit arguments + the sig cookie from being passed 
309                          * in registers.
310                          */
311                         gr = PARAM_REGS;
312                         fr = FLOAT_PARAM_REGS;
313
314                         /* Emit the signature cookie just before the implicit arguments */
315                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
316                 }
317
318                 if (sig->params [i]->byref) {
319                         add_general (&gr, &stack_size, ainfo);
320                         continue;
321                 }
322                 ptype = mono_type_get_underlying_type (sig->params [i]);
323                 switch (ptype->type) {
324                 case MONO_TYPE_BOOLEAN:
325                 case MONO_TYPE_I1:
326                 case MONO_TYPE_U1:
327                         add_general (&gr, &stack_size, ainfo);
328                         break;
329                 case MONO_TYPE_I2:
330                 case MONO_TYPE_U2:
331                 case MONO_TYPE_CHAR:
332                         add_general (&gr, &stack_size, ainfo);
333                         break;
334                 case MONO_TYPE_I4:
335                 case MONO_TYPE_U4:
336                         add_general (&gr, &stack_size, ainfo);
337                         break;
338                 case MONO_TYPE_I:
339                 case MONO_TYPE_U:
340                 case MONO_TYPE_PTR:
341                 case MONO_TYPE_FNPTR:
342                 case MONO_TYPE_CLASS:
343                 case MONO_TYPE_OBJECT:
344                 case MONO_TYPE_STRING:
345                 case MONO_TYPE_SZARRAY:
346                 case MONO_TYPE_ARRAY:
347                         add_general (&gr, &stack_size, ainfo);
348                         break;
349                 case MONO_TYPE_VALUETYPE:
350                         add_valuetype (sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
351                         break;
352                 case MONO_TYPE_TYPEDBYREF:
353                         stack_size += sizeof (MonoTypedRef);
354                         ainfo->storage = ArgOnStack;
355                         break;
356                 case MONO_TYPE_U8:
357                 case MONO_TYPE_I8:
358                         add_general_pair (&gr, &stack_size, ainfo);
359                         break;
360                 case MONO_TYPE_R4:
361                         add_float (&fr, &stack_size, ainfo, FALSE);
362                         break;
363                 case MONO_TYPE_R8:
364                         add_float (&fr, &stack_size, ainfo, TRUE);
365                         break;
366                 default:
367                         g_error ("unexpected type 0x%x", ptype->type);
368                         g_assert_not_reached ();
369                 }
370         }
371
372         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
373                 gr = PARAM_REGS;
374                 fr = FLOAT_PARAM_REGS;
375                 
376                 /* Emit the signature cookie just before the implicit arguments */
377                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
378         }
379
380         cinfo->stack_usage = stack_size;
381         cinfo->reg_usage = gr;
382         cinfo->freg_usage = fr;
383         return cinfo;
384 }
385
386 /*
387  * mono_arch_get_argument_info:
388  * @csig:  a method signature
389  * @param_count: the number of parameters to consider
390  * @arg_info: an array to store the result infos
391  *
392  * Gathers information on parameters such as size, alignment and
393  * padding. arg_info should be large enought to hold param_count + 1 entries. 
394  *
395  * Returns the size of the activation frame.
396  */
397 int
398 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
399 {
400         int k, frame_size = 0;
401         int size, align, pad;
402         int offset = 8;
403         CallInfo *cinfo;
404
405         cinfo = get_call_info (csig, FALSE);
406
407         if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
408                 frame_size += sizeof (gpointer);
409                 offset += 4;
410         }
411
412         arg_info [0].offset = offset;
413
414         if (csig->hasthis) {
415                 frame_size += sizeof (gpointer);
416                 offset += 4;
417         }
418
419         arg_info [0].size = frame_size;
420
421         for (k = 0; k < param_count; k++) {
422                 
423                 if (csig->pinvoke)
424                         size = mono_type_native_stack_size (csig->params [k], &align);
425                 else
426                         size = mono_type_stack_size (csig->params [k], &align);
427
428                 /* ignore alignment for now */
429                 align = 1;
430
431                 frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1); 
432                 arg_info [k].pad = pad;
433                 frame_size += size;
434                 arg_info [k + 1].pad = 0;
435                 arg_info [k + 1].size = size;
436                 offset += pad;
437                 arg_info [k + 1].offset = offset;
438                 offset += size;
439         }
440
441         align = MONO_ARCH_FRAME_ALIGNMENT;
442         frame_size += pad = (align - (frame_size & (align - 1))) & (align - 1);
443         arg_info [k].pad = pad;
444
445         g_free (cinfo);
446
447         return frame_size;
448 }
449
450 static const guchar cpuid_impl [] = {
451         0x55,                           /* push   %ebp */
452         0x89, 0xe5,                     /* mov    %esp,%ebp */
453         0x53,                           /* push   %ebx */
454         0x8b, 0x45, 0x08,               /* mov    0x8(%ebp),%eax */
455         0x0f, 0xa2,                     /* cpuid   */
456         0x50,                           /* push   %eax */
457         0x8b, 0x45, 0x10,               /* mov    0x10(%ebp),%eax */
458         0x89, 0x18,                     /* mov    %ebx,(%eax) */
459         0x8b, 0x45, 0x14,               /* mov    0x14(%ebp),%eax */
460         0x89, 0x08,                     /* mov    %ecx,(%eax) */
461         0x8b, 0x45, 0x18,               /* mov    0x18(%ebp),%eax */
462         0x89, 0x10,                     /* mov    %edx,(%eax) */
463         0x58,                           /* pop    %eax */
464         0x8b, 0x55, 0x0c,               /* mov    0xc(%ebp),%edx */
465         0x89, 0x02,                     /* mov    %eax,(%edx) */
466         0x5b,                           /* pop    %ebx */
467         0xc9,                           /* leave   */
468         0xc3,                           /* ret     */
469 };
470
471 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
472
473 static int 
474 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
475 {
476         int have_cpuid = 0;
477 #ifndef _MSC_VER
478         __asm__  __volatile__ (
479                 "pushfl\n"
480                 "popl %%eax\n"
481                 "movl %%eax, %%edx\n"
482                 "xorl $0x200000, %%eax\n"
483                 "pushl %%eax\n"
484                 "popfl\n"
485                 "pushfl\n"
486                 "popl %%eax\n"
487                 "xorl %%edx, %%eax\n"
488                 "andl $0x200000, %%eax\n"
489                 "movl %%eax, %0"
490                 : "=r" (have_cpuid)
491                 :
492                 : "%eax", "%edx"
493         );
494 #else
495         __asm {
496                 pushfd
497                 pop eax
498                 mov edx, eax
499                 xor eax, 0x200000
500                 push eax
501                 popfd
502                 pushfd
503                 pop eax
504                 xor eax, edx
505                 and eax, 0x200000
506                 mov have_cpuid, eax
507         }
508 #endif
509         if (have_cpuid) {
510                 /* Have to use the code manager to get around WinXP DEP */
511                 MonoCodeManager *codeman = mono_code_manager_new_dynamic ();
512                 CpuidFunc func;
513                 void *ptr = mono_code_manager_reserve (codeman, sizeof (cpuid_impl));
514                 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
515
516                 func = (CpuidFunc)ptr;
517                 func (id, p_eax, p_ebx, p_ecx, p_edx);
518
519                 mono_code_manager_destroy (codeman);
520
521                 /*
522                  * We use this approach because of issues with gcc and pic code, see:
523                  * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
524                 __asm__ __volatile__ ("cpuid"
525                         : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
526                         : "a" (id));
527                 */
528                 return 1;
529         }
530         return 0;
531 }
532
533 /*
534  * Initialize the cpu to execute managed code.
535  */
536 void
537 mono_arch_cpu_init (void)
538 {
539         /* spec compliance requires running with double precision */
540 #ifndef _MSC_VER
541         guint16 fpcw;
542
543         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
544         fpcw &= ~X86_FPCW_PRECC_MASK;
545         fpcw |= X86_FPCW_PREC_DOUBLE;
546         __asm__  __volatile__ ("fldcw %0\n": : "m" (fpcw));
547         __asm__  __volatile__ ("fnstcw %0\n": "=m" (fpcw));
548 #else
549         _control87 (_PC_53, MCW_PC);
550 #endif
551 }
552
553 /*
554  * This function returns the optimizations supported on this cpu.
555  */
556 guint32
557 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
558 {
559         int eax, ebx, ecx, edx;
560         guint32 opts = 0;
561         
562         *exclude_mask = 0;
563         /* Feature Flags function, flags returned in EDX. */
564         if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
565                 if (edx & (1 << 15)) {
566                         opts |= MONO_OPT_CMOV;
567                         if (edx & 1)
568                                 opts |= MONO_OPT_FCMOV;
569                         else
570                                 *exclude_mask |= MONO_OPT_FCMOV;
571                 } else
572                         *exclude_mask |= MONO_OPT_CMOV;
573         }
574         return opts;
575 }
576
577 /*
578  * Determine whenever the trap whose info is in SIGINFO is caused by
579  * integer overflow.
580  */
581 gboolean
582 mono_arch_is_int_overflow (void *sigctx, void *info)
583 {
584         MonoContext ctx;
585         guint8* ip;
586
587         mono_arch_sigctx_to_monoctx (sigctx, &ctx);
588
589         ip = (guint8*)ctx.eip;
590
591         if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
592                 gint32 reg;
593
594                 /* idiv REG */
595                 switch (x86_modrm_rm (ip [1])) {
596                 case X86_EAX:
597                         reg = ctx.eax;
598                         break;
599                 case X86_ECX:
600                         reg = ctx.ecx;
601                         break;
602                 case X86_EDX:
603                         reg = ctx.edx;
604                         break;
605                 case X86_EBX:
606                         reg = ctx.ebx;
607                         break;
608                 case X86_ESI:
609                         reg = ctx.esi;
610                         break;
611                 case X86_EDI:
612                         reg = ctx.edi;
613                         break;
614                 default:
615                         g_assert_not_reached ();
616                         reg = -1;
617                 }
618
619                 if (reg == -1)
620                         return TRUE;
621         }
622                         
623         return FALSE;
624 }
625
626 static gboolean
627 is_regsize_var (MonoType *t) {
628         if (t->byref)
629                 return TRUE;
630         switch (mono_type_get_underlying_type (t)->type) {
631         case MONO_TYPE_I4:
632         case MONO_TYPE_U4:
633         case MONO_TYPE_I:
634         case MONO_TYPE_U:
635         case MONO_TYPE_PTR:
636         case MONO_TYPE_FNPTR:
637                 return TRUE;
638         case MONO_TYPE_OBJECT:
639         case MONO_TYPE_STRING:
640         case MONO_TYPE_CLASS:
641         case MONO_TYPE_SZARRAY:
642         case MONO_TYPE_ARRAY:
643                 return TRUE;
644         case MONO_TYPE_VALUETYPE:
645                 return FALSE;
646         }
647         return FALSE;
648 }
649
650 GList *
651 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
652 {
653         GList *vars = NULL;
654         int i;
655
656         for (i = 0; i < cfg->num_varinfo; i++) {
657                 MonoInst *ins = cfg->varinfo [i];
658                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
659
660                 /* unused vars */
661                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
662                         continue;
663
664                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
665                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
666                         continue;
667
668                 /* we dont allocate I1 to registers because there is no simply way to sign extend 
669                  * 8bit quantities in caller saved registers on x86 */
670                 if (is_regsize_var (ins->inst_vtype) || (ins->inst_vtype->type == MONO_TYPE_BOOLEAN) || 
671                     (ins->inst_vtype->type == MONO_TYPE_U1) || (ins->inst_vtype->type == MONO_TYPE_U2)||
672                     (ins->inst_vtype->type == MONO_TYPE_I2) || (ins->inst_vtype->type == MONO_TYPE_CHAR)) {
673                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
674                         g_assert (i == vmv->idx);
675                         vars = g_list_prepend (vars, vmv);
676                 }
677         }
678
679         vars = mono_varlist_sort (cfg, vars, 0);
680
681         return vars;
682 }
683
684 GList *
685 mono_arch_get_global_int_regs (MonoCompile *cfg)
686 {
687         GList *regs = NULL;
688
689         /* we can use 3 registers for global allocation */
690         regs = g_list_prepend (regs, (gpointer)X86_EBX);
691         regs = g_list_prepend (regs, (gpointer)X86_ESI);
692         regs = g_list_prepend (regs, (gpointer)X86_EDI);
693
694         return regs;
695 }
696
697 /*
698  * mono_arch_regalloc_cost:
699  *
700  *  Return the cost, in number of memory references, of the action of 
701  * allocating the variable VMV into a register during global register
702  * allocation.
703  */
704 guint32
705 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
706 {
707         MonoInst *ins = cfg->varinfo [vmv->idx];
708
709         if (cfg->method->save_lmf)
710                 /* The register is already saved */
711                 return (ins->opcode == OP_ARG) ? 1 : 0;
712         else
713                 /* push+pop+possible load if it is an argument */
714                 return (ins->opcode == OP_ARG) ? 3 : 2;
715 }
716  
717 /*
718  * Set var information according to the calling convention. X86 version.
719  * The locals var stuff should most likely be split in another method.
720  */
721 void
722 mono_arch_allocate_vars (MonoCompile *cfg)
723 {
724         MonoMethodSignature *sig;
725         MonoMethodHeader *header;
726         MonoInst *inst;
727         guint32 locals_stack_size, locals_stack_align;
728         int i, offset;
729         gint32 *offsets;
730         CallInfo *cinfo;
731
732         header = mono_method_get_header (cfg->method);
733         sig = mono_method_signature (cfg->method);
734
735         cinfo = get_call_info (sig, FALSE);
736
737         cfg->frame_reg = MONO_ARCH_BASEREG;
738         offset = 0;
739
740         /* Reserve space to save LMF and caller saved registers */
741
742         if (cfg->method->save_lmf) {
743                 offset += sizeof (MonoLMF);
744         } else {
745                 if (cfg->used_int_regs & (1 << X86_EBX)) {
746                         offset += 4;
747                 }
748
749                 if (cfg->used_int_regs & (1 << X86_EDI)) {
750                         offset += 4;
751                 }
752
753                 if (cfg->used_int_regs & (1 << X86_ESI)) {
754                         offset += 4;
755                 }
756         }
757
758         switch (cinfo->ret.storage) {
759         case ArgValuetypeInReg:
760                 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
761                 offset += 8;
762                 cfg->ret->opcode = OP_REGOFFSET;
763                 cfg->ret->inst_basereg = X86_EBP;
764                 cfg->ret->inst_offset = - offset;
765                 break;
766         default:
767                 break;
768         }
769
770         /* Allocate locals */
771         offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
772         if (locals_stack_align) {
773                 offset += (locals_stack_align - 1);
774                 offset &= ~(locals_stack_align - 1);
775         }
776         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
777                 if (offsets [i] != -1) {
778                         MonoInst *inst = cfg->varinfo [i];
779                         inst->opcode = OP_REGOFFSET;
780                         inst->inst_basereg = X86_EBP;
781                         inst->inst_offset = - (offset + offsets [i]);
782                         //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
783                 }
784         }
785         g_free (offsets);
786         offset += locals_stack_size;
787
788
789         /*
790          * Allocate arguments+return value
791          */
792
793         switch (cinfo->ret.storage) {
794         case ArgOnStack:
795                 cfg->ret->opcode = OP_REGOFFSET;
796                 cfg->ret->inst_basereg = X86_EBP;
797                 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
798                 break;
799         case ArgValuetypeInReg:
800                 break;
801         case ArgInIReg:
802                 cfg->ret->opcode = OP_REGVAR;
803                 cfg->ret->inst_c0 = cinfo->ret.reg;
804                 break;
805         case ArgNone:
806         case ArgOnFloatFpStack:
807         case ArgOnDoubleFpStack:
808                 break;
809         default:
810                 g_assert_not_reached ();
811         }
812
813         if (sig->call_convention == MONO_CALL_VARARG) {
814                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
815                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
816         }
817
818         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
819                 ArgInfo *ainfo = &cinfo->args [i];
820                 inst = cfg->varinfo [i];
821                 if (inst->opcode != OP_REGVAR) {
822                         inst->opcode = OP_REGOFFSET;
823                         inst->inst_basereg = X86_EBP;
824                 }
825                 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
826         }
827
828         offset += (MONO_ARCH_FRAME_ALIGNMENT - 1);
829         offset &= ~(MONO_ARCH_FRAME_ALIGNMENT - 1);
830
831         cfg->stack_offset = offset;
832
833         g_free (cinfo);
834 }
835
836 void
837 mono_arch_create_vars (MonoCompile *cfg)
838 {
839         MonoMethodSignature *sig;
840         CallInfo *cinfo;
841
842         sig = mono_method_signature (cfg->method);
843
844         cinfo = get_call_info (sig, FALSE);
845
846         if (cinfo->ret.storage == ArgValuetypeInReg)
847                 cfg->ret_var_is_local = TRUE;
848
849         g_free (cinfo);
850 }
851
852 /* Fixme: we need an alignment solution for enter_method and mono_arch_call_opcode,
853  * currently alignment in mono_arch_call_opcode is computed without arch_get_argument_info 
854  */
855
856 /* 
857  * take the arguments and generate the arch-specific
858  * instructions to properly call the function in call.
859  * This includes pushing, moving arguments to the right register
860  * etc.
861  */
862 MonoCallInst*
863 mono_arch_call_opcode (MonoCompile *cfg, MonoBasicBlock* bb, MonoCallInst *call, int is_virtual) {
864         MonoInst *arg, *in;
865         MonoMethodSignature *sig;
866         int i, n;
867         CallInfo *cinfo;
868         int sentinelpos;
869
870         sig = call->signature;
871         n = sig->param_count + sig->hasthis;
872
873         cinfo = get_call_info (sig, FALSE);
874
875         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
876                 sentinelpos = sig->sentinelpos + (is_virtual ? 1 : 0);
877
878         for (i = 0; i < n; ++i) {
879                 ArgInfo *ainfo = cinfo->args + i;
880
881                 /* Emit the signature cookie just before the implicit arguments */
882                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
883                         MonoMethodSignature *tmp_sig;
884                         MonoInst *sig_arg;
885
886                         /* FIXME: Add support for signature tokens to AOT */
887                         cfg->disable_aot = TRUE;
888                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
889
890                         /*
891                          * mono_ArgIterator_Setup assumes the signature cookie is 
892                          * passed first and all the arguments which were before it are
893                          * passed on the stack after the signature. So compensate by 
894                          * passing a different signature.
895                          */
896                         tmp_sig = mono_metadata_signature_dup (call->signature);
897                         tmp_sig->param_count -= call->signature->sentinelpos;
898                         tmp_sig->sentinelpos = 0;
899                         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
900
901                         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
902                         sig_arg->inst_p0 = tmp_sig;
903
904                         arg->inst_left = sig_arg;
905                         arg->type = STACK_PTR;
906                         /* prepend, so they get reversed */
907                         arg->next = call->out_args;
908                         call->out_args = arg;
909                 }
910
911                 if (is_virtual && i == 0) {
912                         /* the argument will be attached to the call instrucion */
913                         in = call->args [i];
914                 } else {
915                         MonoType *t;
916
917                         if (i >= sig->hasthis)
918                                 t = sig->params [i - sig->hasthis];
919                         else
920                                 t = &mono_defaults.int_class->byval_arg;
921                         t = mono_type_get_underlying_type (t);
922
923                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
924                         in = call->args [i];
925                         arg->cil_code = in->cil_code;
926                         arg->inst_left = in;
927                         arg->type = in->type;
928                         /* prepend, so they get reversed */
929                         arg->next = call->out_args;
930                         call->out_args = arg;
931
932                         if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
933                                 gint align;
934                                 guint32 size;
935
936                                 if (t->type == MONO_TYPE_TYPEDBYREF) {
937                                         size = sizeof (MonoTypedRef);
938                                         align = sizeof (gpointer);
939                                 }
940                                 else
941                                         if (sig->pinvoke)
942                                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
943                                         else
944                                                 size = mono_type_stack_size (&in->klass->byval_arg, &align);
945                                 arg->opcode = OP_OUTARG_VT;
946                                 arg->klass = in->klass;
947                                 arg->unused = sig->pinvoke;
948                                 arg->inst_imm = size; 
949                         }
950                         else {
951                                 switch (ainfo->storage) {
952                                 case ArgOnStack:
953                                         arg->opcode = OP_OUTARG;
954                                         if (!t->byref) {
955                                                 if (t->type == MONO_TYPE_R4)
956                                                         arg->opcode = OP_OUTARG_R4;
957                                                 else
958                                                         if (t->type == MONO_TYPE_R8)
959                                                                 arg->opcode = OP_OUTARG_R8;
960                                         }
961                                         break;
962                                 default:
963                                         g_assert_not_reached ();
964                                 }
965                         }
966                 }
967         }
968
969         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
970                 if (cinfo->ret.storage == ArgValuetypeInReg) {
971                         MonoInst *zero_inst;
972                         /*
973                          * After the call, the struct is in registers, but needs to be saved to the memory pointed
974                          * to by vt_arg in this_vret_args. This means that vt_arg needs to be saved somewhere
975                          * before calling the function. So we add a dummy instruction to represent pushing the 
976                          * struct return address to the stack. The return address will be saved to this stack slot 
977                          * by the code emitted in this_vret_args.
978                          */
979                         MONO_INST_NEW (cfg, arg, OP_OUTARG);
980                         MONO_INST_NEW (cfg, zero_inst, OP_ICONST);
981                         zero_inst->inst_p0 = 0;
982                         arg->inst_left = zero_inst;
983                         arg->type = STACK_PTR;
984                         /* prepend, so they get reversed */
985                         arg->next = call->out_args;
986                         call->out_args = arg;
987                 }
988                 else
989                         /* if the function returns a struct, the called method already does a ret $0x4 */
990                         if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret))
991                                 cinfo->stack_usage -= 4;
992         }
993
994         call->stack_usage = cinfo->stack_usage;
995         g_free (cinfo);
996
997         return call;
998 }
999
1000 /*
1001  * Allow tracing to work with this interface (with an optional argument)
1002  */
1003 void*
1004 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1005 {
1006         guchar *code = p;
1007
1008         /* if some args are passed in registers, we need to save them here */
1009         x86_push_reg (code, X86_EBP);
1010
1011         if (cfg->compile_aot) {
1012                 x86_push_imm (code, cfg->method);
1013                 x86_mov_reg_imm (code, X86_EAX, func);
1014                 x86_call_reg (code, X86_EAX);
1015         } else {
1016                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1017                 x86_push_imm (code, cfg->method);
1018                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1019                 x86_call_code (code, 0);
1020         }
1021         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1022
1023         return code;
1024 }
1025
1026 enum {
1027         SAVE_NONE,
1028         SAVE_STRUCT,
1029         SAVE_EAX,
1030         SAVE_EAX_EDX,
1031         SAVE_FP
1032 };
1033
1034 void*
1035 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1036 {
1037         guchar *code = p;
1038         int arg_size = 0, save_mode = SAVE_NONE;
1039         MonoMethod *method = cfg->method;
1040         
1041         switch (mono_type_get_underlying_type (mono_method_signature (method)->ret)->type) {
1042         case MONO_TYPE_VOID:
1043                 /* special case string .ctor icall */
1044                 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1045                         save_mode = SAVE_EAX;
1046                 else
1047                         save_mode = SAVE_NONE;
1048                 break;
1049         case MONO_TYPE_I8:
1050         case MONO_TYPE_U8:
1051                 save_mode = SAVE_EAX_EDX;
1052                 break;
1053         case MONO_TYPE_R4:
1054         case MONO_TYPE_R8:
1055                 save_mode = SAVE_FP;
1056                 break;
1057         case MONO_TYPE_VALUETYPE:
1058                 save_mode = SAVE_STRUCT;
1059                 break;
1060         default:
1061                 save_mode = SAVE_EAX;
1062                 break;
1063         }
1064
1065         switch (save_mode) {
1066         case SAVE_EAX_EDX:
1067                 x86_push_reg (code, X86_EDX);
1068                 x86_push_reg (code, X86_EAX);
1069                 if (enable_arguments) {
1070                         x86_push_reg (code, X86_EDX);
1071                         x86_push_reg (code, X86_EAX);
1072                         arg_size = 8;
1073                 }
1074                 break;
1075         case SAVE_EAX:
1076                 x86_push_reg (code, X86_EAX);
1077                 if (enable_arguments) {
1078                         x86_push_reg (code, X86_EAX);
1079                         arg_size = 4;
1080                 }
1081                 break;
1082         case SAVE_FP:
1083                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1084                 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1085                 if (enable_arguments) {
1086                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1087                         x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1088                         arg_size = 8;
1089                 }
1090                 break;
1091         case SAVE_STRUCT:
1092                 if (enable_arguments) {
1093                         x86_push_membase (code, X86_EBP, 8);
1094                         arg_size = 4;
1095                 }
1096                 break;
1097         case SAVE_NONE:
1098         default:
1099                 break;
1100         }
1101
1102         if (cfg->compile_aot) {
1103                 x86_push_imm (code, method);
1104                 x86_mov_reg_imm (code, X86_EAX, func);
1105                 x86_call_reg (code, X86_EAX);
1106         } else {
1107                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1108                 x86_push_imm (code, method);
1109                 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1110                 x86_call_code (code, 0);
1111         }
1112         x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1113
1114         switch (save_mode) {
1115         case SAVE_EAX_EDX:
1116                 x86_pop_reg (code, X86_EAX);
1117                 x86_pop_reg (code, X86_EDX);
1118                 break;
1119         case SAVE_EAX:
1120                 x86_pop_reg (code, X86_EAX);
1121                 break;
1122         case SAVE_FP:
1123                 x86_fld_membase (code, X86_ESP, 0, TRUE);
1124                 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1125                 break;
1126         case SAVE_NONE:
1127         default:
1128                 break;
1129         }
1130
1131         return code;
1132 }
1133
1134 #define EMIT_COND_BRANCH(ins,cond,sign) \
1135 if (ins->flags & MONO_INST_BRLABEL) { \
1136         if (ins->inst_i0->inst_c0) { \
1137                 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1138         } else { \
1139                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1140                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1141                     x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1142                         x86_branch8 (code, cond, 0, sign); \
1143                 else \
1144                         x86_branch32 (code, cond, 0, sign); \
1145         } \
1146 } else { \
1147         if (ins->inst_true_bb->native_offset) { \
1148                 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1149         } else { \
1150                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1151                 if ((cfg->opt & MONO_OPT_BRANCH) && \
1152                     x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1153                         x86_branch8 (code, cond, 0, sign); \
1154                 else \
1155                         x86_branch32 (code, cond, 0, sign); \
1156         } \
1157 }
1158
1159 /* emit an exception if condition is fail */
1160 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name)            \
1161         do {                                                        \
1162                 mono_add_patch_info (cfg, code - cfg->native_code,   \
1163                                     MONO_PATCH_INFO_EXC, exc_name);  \
1164                 x86_branch32 (code, cond, 0, signed);               \
1165         } while (0); 
1166
1167 #define EMIT_FPCOMPARE(code) do { \
1168         x86_fcompp (code); \
1169         x86_fnstsw (code); \
1170 } while (0); 
1171
1172
1173 static guint8*
1174 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1175 {
1176         if (cfg->compile_aot) {
1177                 guint32 got_reg = X86_EAX;
1178
1179                 if (cfg->compile_aot) {          
1180                         /*
1181                          * Since the patches are generated by the back end, there is
1182                          * no way to generate a got_var at this point.
1183                          */
1184                         g_assert (cfg->got_var);
1185
1186                         if (cfg->got_var->opcode == OP_REGOFFSET)
1187                                 x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
1188                         else
1189                                 got_reg = cfg->got_var->dreg;
1190                 }
1191
1192                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1193                 x86_call_membase (code, got_reg, 0xf0f0f0f0);
1194         }
1195         else {
1196                 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1197                 x86_call_code (code, 0);
1198         }
1199
1200         return code;
1201 }
1202
1203 /* FIXME: Add more instructions */
1204 #define INST_IGNORES_CFLAGS(ins) (((ins)->opcode == CEE_BR) || ((ins)->opcode == OP_STORE_MEMBASE_IMM) || ((ins)->opcode == OP_STOREI4_MEMBASE_REG))
1205
1206 static void
1207 peephole_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1208 {
1209         MonoInst *ins, *last_ins = NULL;
1210         ins = bb->code;
1211
1212         while (ins) {
1213
1214                 switch (ins->opcode) {
1215                 case OP_ICONST:
1216                         /* reg = 0 -> XOR (reg, reg) */
1217                         /* XOR sets cflags on x86, so we cant do it always */
1218                         if (ins->inst_c0 == 0 && ins->next && INST_IGNORES_CFLAGS (ins->next)) {
1219                                 ins->opcode = CEE_XOR;
1220                                 ins->sreg1 = ins->dreg;
1221                                 ins->sreg2 = ins->dreg;
1222                         }
1223                         break;
1224                 case OP_MUL_IMM: 
1225                         /* remove unnecessary multiplication with 1 */
1226                         if (ins->inst_imm == 1) {
1227                                 if (ins->dreg != ins->sreg1) {
1228                                         ins->opcode = OP_MOVE;
1229                                 } else {
1230                                         last_ins->next = ins->next;
1231                                         ins = ins->next;
1232                                         continue;
1233                                 }
1234                         }
1235                         break;
1236                 case OP_COMPARE_IMM:
1237                         /* OP_COMPARE_IMM (reg, 0) 
1238                          * --> 
1239                          * OP_X86_TEST_NULL (reg) 
1240                          */
1241                         if (!ins->inst_imm)
1242                                 ins->opcode = OP_X86_TEST_NULL;
1243                         break;
1244                 case OP_X86_COMPARE_MEMBASE_IMM:
1245                         /* 
1246                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1247                          * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1248                          * -->
1249                          * OP_STORE_MEMBASE_REG reg, offset(basereg)
1250                          * OP_COMPARE_IMM reg, imm
1251                          *
1252                          * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1253                          */
1254                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1255                             ins->inst_basereg == last_ins->inst_destbasereg &&
1256                             ins->inst_offset == last_ins->inst_offset) {
1257                                         ins->opcode = OP_COMPARE_IMM;
1258                                         ins->sreg1 = last_ins->sreg1;
1259
1260                                         /* check if we can remove cmp reg,0 with test null */
1261                                         if (!ins->inst_imm)
1262                                                 ins->opcode = OP_X86_TEST_NULL;
1263                                 }
1264
1265                         break;
1266                 case OP_LOAD_MEMBASE:
1267                 case OP_LOADI4_MEMBASE:
1268                         /* 
1269                          * Note: if reg1 = reg2 the load op is removed
1270                          *
1271                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1272                          * OP_LOAD_MEMBASE offset(basereg), reg2
1273                          * -->
1274                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1275                          * OP_MOVE reg1, reg2
1276                          */
1277                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG 
1278                                          || last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1279                             ins->inst_basereg == last_ins->inst_destbasereg &&
1280                             ins->inst_offset == last_ins->inst_offset) {
1281                                 if (ins->dreg == last_ins->sreg1) {
1282                                         last_ins->next = ins->next;                             
1283                                         ins = ins->next;                                
1284                                         continue;
1285                                 } else {
1286                                         //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1287                                         ins->opcode = OP_MOVE;
1288                                         ins->sreg1 = last_ins->sreg1;
1289                                 }
1290
1291                         /* 
1292                          * Note: reg1 must be different from the basereg in the second load
1293                          * Note: if reg1 = reg2 is equal then second load is removed
1294                          *
1295                          * OP_LOAD_MEMBASE offset(basereg), reg1
1296                          * OP_LOAD_MEMBASE offset(basereg), reg2
1297                          * -->
1298                          * OP_LOAD_MEMBASE offset(basereg), reg1
1299                          * OP_MOVE reg1, reg2
1300                          */
1301                         } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE
1302                                            || last_ins->opcode == OP_LOAD_MEMBASE) &&
1303                               ins->inst_basereg != last_ins->dreg &&
1304                               ins->inst_basereg == last_ins->inst_basereg &&
1305                               ins->inst_offset == last_ins->inst_offset) {
1306
1307                                 if (ins->dreg == last_ins->dreg) {
1308                                         last_ins->next = ins->next;                             
1309                                         ins = ins->next;                                
1310                                         continue;
1311                                 } else {
1312                                         ins->opcode = OP_MOVE;
1313                                         ins->sreg1 = last_ins->dreg;
1314                                 }
1315
1316                                 //g_assert_not_reached ();
1317
1318 #if 0
1319                         /* 
1320                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1321                          * OP_LOAD_MEMBASE offset(basereg), reg
1322                          * -->
1323                          * OP_STORE_MEMBASE_IMM imm, offset(basereg) 
1324                          * OP_ICONST reg, imm
1325                          */
1326                         } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM
1327                                                 || last_ins->opcode == OP_STORE_MEMBASE_IMM) &&
1328                                    ins->inst_basereg == last_ins->inst_destbasereg &&
1329                                    ins->inst_offset == last_ins->inst_offset) {
1330                                 //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++);
1331                                 ins->opcode = OP_ICONST;
1332                                 ins->inst_c0 = last_ins->inst_imm;
1333                                 g_assert_not_reached (); // check this rule
1334 #endif
1335                         }
1336                         break;
1337                 case OP_LOADU1_MEMBASE:
1338                 case OP_LOADI1_MEMBASE:
1339                         /* 
1340                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1341                          * OP_LOAD_MEMBASE offset(basereg), reg2
1342                          * -->
1343                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1344                          * CONV_I2/U2 reg1, reg2
1345                          */
1346                         if (last_ins && X86_IS_BYTE_REG (last_ins->sreg1) &&
1347                                 (last_ins->opcode == OP_STOREI1_MEMBASE_REG) &&
1348                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1349                                         ins->inst_offset == last_ins->inst_offset) {
1350                                 ins->opcode = (ins->opcode == OP_LOADI1_MEMBASE) ? CEE_CONV_I1 : CEE_CONV_U1;
1351                                 ins->sreg1 = last_ins->sreg1;
1352                         }
1353                         break;
1354                 case OP_LOADU2_MEMBASE:
1355                 case OP_LOADI2_MEMBASE:
1356                         /* 
1357                          * OP_STORE_MEMBASE_REG reg1, offset(basereg) 
1358                          * OP_LOAD_MEMBASE offset(basereg), reg2
1359                          * -->
1360                          * OP_STORE_MEMBASE_REG reg1, offset(basereg)
1361                          * CONV_I2/U2 reg1, reg2
1362                          */
1363                         if (last_ins && (last_ins->opcode == OP_STOREI2_MEMBASE_REG) &&
1364                                         ins->inst_basereg == last_ins->inst_destbasereg &&
1365                                         ins->inst_offset == last_ins->inst_offset) {
1366                                 ins->opcode = (ins->opcode == OP_LOADI2_MEMBASE) ? CEE_CONV_I2 : CEE_CONV_U2;
1367                                 ins->sreg1 = last_ins->sreg1;
1368                         }
1369                         break;
1370                 case CEE_CONV_I4:
1371                 case CEE_CONV_U4:
1372                 case OP_MOVE:
1373                         /*
1374                          * Removes:
1375                          *
1376                          * OP_MOVE reg, reg 
1377                          */
1378                         if (ins->dreg == ins->sreg1) {
1379                                 if (last_ins)
1380                                         last_ins->next = ins->next;                             
1381                                 ins = ins->next;
1382                                 continue;
1383                         }
1384                         /* 
1385                          * Removes:
1386                          *
1387                          * OP_MOVE sreg, dreg 
1388                          * OP_MOVE dreg, sreg
1389                          */
1390                         if (last_ins && last_ins->opcode == OP_MOVE &&
1391                             ins->sreg1 == last_ins->dreg &&
1392                             ins->dreg == last_ins->sreg1) {
1393                                 last_ins->next = ins->next;                             
1394                                 ins = ins->next;                                
1395                                 continue;
1396                         }
1397                         break;
1398                         
1399                 case OP_X86_PUSH_MEMBASE:
1400                         if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1401                                          last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1402                             ins->inst_basereg == last_ins->inst_destbasereg &&
1403                             ins->inst_offset == last_ins->inst_offset) {
1404                                     ins->opcode = OP_X86_PUSH;
1405                                     ins->sreg1 = last_ins->sreg1;
1406                         }
1407                         break;
1408                 }
1409                 last_ins = ins;
1410                 ins = ins->next;
1411         }
1412         bb->last_ins = last_ins;
1413 }
1414
1415 static const int 
1416 branch_cc_table [] = {
1417         X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1418         X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1419         X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1420 };
1421
1422 static const char*const * ins_spec = pentium_desc;
1423
1424 /*#include "cprop.c"*/
1425 void
1426 mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
1427 {
1428         mono_local_regalloc (cfg, bb);
1429 }
1430
1431 static unsigned char*
1432 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1433 {
1434         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1435         x86_fnstcw_membase(code, X86_ESP, 0);
1436         x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1437         x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1438         x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1439         x86_fldcw_membase (code, X86_ESP, 2);
1440         if (size == 8) {
1441                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1442                 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1443                 x86_pop_reg (code, dreg);
1444                 /* FIXME: need the high register 
1445                  * x86_pop_reg (code, dreg_high);
1446                  */
1447         } else {
1448                 x86_push_reg (code, X86_EAX); // SP = SP - 4
1449                 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1450                 x86_pop_reg (code, dreg);
1451         }
1452         x86_fldcw_membase (code, X86_ESP, 0);
1453         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1454
1455         if (size == 1)
1456                 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1457         else if (size == 2)
1458                 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1459         return code;
1460 }
1461
1462 static unsigned char*
1463 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1464 {
1465         int sreg = tree->sreg1;
1466         int need_touch = FALSE;
1467
1468 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1469         need_touch = TRUE;
1470 #endif
1471
1472         if (need_touch) {
1473                 guint8* br[5];
1474
1475                 /*
1476                  * Under Windows:
1477                  * If requested stack size is larger than one page,
1478                  * perform stack-touch operation
1479                  */
1480                 /*
1481                  * Generate stack probe code.
1482                  * Under Windows, it is necessary to allocate one page at a time,
1483                  * "touching" stack after each successful sub-allocation. This is
1484                  * because of the way stack growth is implemented - there is a
1485                  * guard page before the lowest stack page that is currently commited.
1486                  * Stack normally grows sequentially so OS traps access to the
1487                  * guard page and commits more pages when needed.
1488                  */
1489                 x86_test_reg_imm (code, sreg, ~0xFFF);
1490                 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1491
1492                 br[2] = code; /* loop */
1493                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1494                 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1495
1496                 /* 
1497                  * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1498                  * that follows only initializes the last part of the area.
1499                  */
1500                 /* Same as the init code below with size==0x1000 */
1501                 if (tree->flags & MONO_INST_INIT) {
1502                         x86_push_reg (code, X86_EAX);
1503                         x86_push_reg (code, X86_ECX);
1504                         x86_push_reg (code, X86_EDI);
1505                         x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1506                         x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);                              
1507                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1508                         x86_cld (code);
1509                         x86_prefix (code, X86_REP_PREFIX);
1510                         x86_stosl (code);
1511                         x86_pop_reg (code, X86_EDI);
1512                         x86_pop_reg (code, X86_ECX);
1513                         x86_pop_reg (code, X86_EAX);
1514                 }
1515
1516                 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1517                 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1518                 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1519                 x86_patch (br[3], br[2]);
1520                 x86_test_reg_reg (code, sreg, sreg);
1521                 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1522                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1523
1524                 br[1] = code; x86_jump8 (code, 0);
1525
1526                 x86_patch (br[0], code);
1527                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1528                 x86_patch (br[1], code);
1529                 x86_patch (br[4], code);
1530         }
1531         else
1532                 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1533
1534         if (tree->flags & MONO_INST_INIT) {
1535                 int offset = 0;
1536                 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1537                         x86_push_reg (code, X86_EAX);
1538                         offset += 4;
1539                 }
1540                 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1541                         x86_push_reg (code, X86_ECX);
1542                         offset += 4;
1543                 }
1544                 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1545                         x86_push_reg (code, X86_EDI);
1546                         offset += 4;
1547                 }
1548                 
1549                 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1550                 if (sreg != X86_ECX)
1551                         x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1552                 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1553                                 
1554                 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1555                 x86_cld (code);
1556                 x86_prefix (code, X86_REP_PREFIX);
1557                 x86_stosl (code);
1558                 
1559                 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1560                         x86_pop_reg (code, X86_EDI);
1561                 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1562                         x86_pop_reg (code, X86_ECX);
1563                 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1564                         x86_pop_reg (code, X86_EAX);
1565         }
1566         return code;
1567 }
1568
1569
1570 static guint8*
1571 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1572 {
1573         CallInfo *cinfo;
1574         int quad;
1575
1576         /* Move return value to the target register */
1577         switch (ins->opcode) {
1578         case CEE_CALL:
1579         case OP_CALL_REG:
1580         case OP_CALL_MEMBASE:
1581                 if (ins->dreg != X86_EAX)
1582                         x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1583                 break;
1584         case OP_VCALL:
1585         case OP_VCALL_REG:
1586         case OP_VCALL_MEMBASE:
1587                 cinfo = get_call_info (((MonoCallInst*)ins)->signature, FALSE);
1588                 if (cinfo->ret.storage == ArgValuetypeInReg) {
1589                         /* Pop the destination address from the stack */
1590                         x86_pop_reg (code, X86_ECX);
1591                         
1592                         for (quad = 0; quad < 2; quad ++) {
1593                                 switch (cinfo->ret.pair_storage [quad]) {
1594                                 case ArgInIReg:
1595                                         g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1596                                         x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1597                                         break;
1598                                 case ArgNone:
1599                                         break;
1600                                 default:
1601                                         g_assert_not_reached ();
1602                                 }
1603                         }
1604                 }
1605                 g_free (cinfo);
1606         default:
1607                 break;
1608         }
1609
1610         return code;
1611 }
1612
1613 static guint8*
1614 emit_tls_get (guint8* code, int dreg, int tls_offset)
1615 {
1616 #ifdef PLATFORM_WIN32
1617         /* 
1618          * See the Under the Hood article in the May 1996 issue of Microsoft Systems 
1619          * Journal and/or a disassembly of the TlsGet () function.
1620          */
1621         g_assert (tls_offset < 64);
1622         x86_prefix (code, X86_FS_PREFIX);
1623         x86_mov_reg_mem (code, dreg, 0x18, 4);
1624         /* Dunno what this does but TlsGetValue () contains it */
1625         x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1626         x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1627 #else
1628         x86_prefix (code, X86_GS_PREFIX);
1629         x86_mov_reg_mem (code, dreg, tls_offset, 4);                    
1630 #endif
1631         return code;
1632 }
1633
1634 #define REAL_PRINT_REG(text,reg) \
1635 mono_assert (reg >= 0); \
1636 x86_push_reg (code, X86_EAX); \
1637 x86_push_reg (code, X86_EDX); \
1638 x86_push_reg (code, X86_ECX); \
1639 x86_push_reg (code, reg); \
1640 x86_push_imm (code, reg); \
1641 x86_push_imm (code, text " %d %p\n"); \
1642 x86_mov_reg_imm (code, X86_EAX, printf); \
1643 x86_call_reg (code, X86_EAX); \
1644 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
1645 x86_pop_reg (code, X86_ECX); \
1646 x86_pop_reg (code, X86_EDX); \
1647 x86_pop_reg (code, X86_EAX);
1648
1649 /* benchmark and set based on cpu */
1650 #define LOOP_ALIGNMENT 8
1651 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
1652
1653 void
1654 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
1655 {
1656         MonoInst *ins;
1657         MonoCallInst *call;
1658         guint offset;
1659         guint8 *code = cfg->native_code + cfg->code_len;
1660         MonoInst *last_ins = NULL;
1661         guint last_offset = 0;
1662         int max_len, cpos;
1663
1664         if (cfg->opt & MONO_OPT_PEEPHOLE)
1665                 peephole_pass (cfg, bb);
1666
1667         if (cfg->opt & MONO_OPT_LOOP) {
1668                 int pad, align = LOOP_ALIGNMENT;
1669                 /* set alignment depending on cpu */
1670                 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
1671                         pad = align - pad;
1672                         /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
1673                         x86_padding (code, pad);
1674                         cfg->code_len += pad;
1675                         bb->native_offset = cfg->code_len;
1676                 }
1677         }
1678
1679         if (cfg->verbose_level > 2)
1680                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
1681
1682         cpos = bb->max_offset;
1683
1684         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
1685                 MonoProfileCoverageInfo *cov = cfg->coverage_info;
1686                 g_assert (!cfg->compile_aot);
1687                 cpos += 6;
1688
1689                 cov->data [bb->dfn].cil_code = bb->cil_code;
1690                 /* this is not thread save, but good enough */
1691                 x86_inc_mem (code, &cov->data [bb->dfn].count); 
1692         }
1693
1694         offset = code - cfg->native_code;
1695
1696         ins = bb->code;
1697         while (ins) {
1698                 offset = code - cfg->native_code;
1699
1700                 max_len = ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
1701
1702                 if (offset > (cfg->code_size - max_len - 16)) {
1703                         cfg->code_size *= 2;
1704                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
1705                         code = cfg->native_code + offset;
1706                         mono_jit_stats.code_reallocs++;
1707                 }
1708
1709                 mono_debug_record_line_number (cfg, ins, offset);
1710
1711                 switch (ins->opcode) {
1712                 case OP_BIGMUL:
1713                         x86_mul_reg (code, ins->sreg2, TRUE);
1714                         break;
1715                 case OP_BIGMUL_UN:
1716                         x86_mul_reg (code, ins->sreg2, FALSE);
1717                         break;
1718                 case OP_X86_SETEQ_MEMBASE:
1719                 case OP_X86_SETNE_MEMBASE:
1720                         x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
1721                                          ins->inst_basereg, ins->inst_offset, TRUE);
1722                         break;
1723                 case OP_STOREI1_MEMBASE_IMM:
1724                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
1725                         break;
1726                 case OP_STOREI2_MEMBASE_IMM:
1727                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
1728                         break;
1729                 case OP_STORE_MEMBASE_IMM:
1730                 case OP_STOREI4_MEMBASE_IMM:
1731                         x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
1732                         break;
1733                 case OP_STOREI1_MEMBASE_REG:
1734                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
1735                         break;
1736                 case OP_STOREI2_MEMBASE_REG:
1737                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
1738                         break;
1739                 case OP_STORE_MEMBASE_REG:
1740                 case OP_STOREI4_MEMBASE_REG:
1741                         x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
1742                         break;
1743                 case CEE_LDIND_I:
1744                 case CEE_LDIND_I4:
1745                 case CEE_LDIND_U4:
1746                         x86_mov_reg_mem (code, ins->dreg, ins->inst_p0, 4);
1747                         break;
1748                 case OP_LOADU4_MEM:
1749                         x86_mov_reg_imm (code, ins->dreg, ins->inst_p0);
1750                         x86_mov_reg_membase (code, ins->dreg, ins->dreg, 0, 4);
1751                         break;
1752                 case OP_LOAD_MEMBASE:
1753                 case OP_LOADI4_MEMBASE:
1754                 case OP_LOADU4_MEMBASE:
1755                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
1756                         break;
1757                 case OP_LOADU1_MEMBASE:
1758                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
1759                         break;
1760                 case OP_LOADI1_MEMBASE:
1761                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
1762                         break;
1763                 case OP_LOADU2_MEMBASE:
1764                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
1765                         break;
1766                 case OP_LOADI2_MEMBASE:
1767                         x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
1768                         break;
1769                 case CEE_CONV_I1:
1770                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
1771                         break;
1772                 case CEE_CONV_I2:
1773                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
1774                         break;
1775                 case CEE_CONV_U1:
1776                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
1777                         break;
1778                 case CEE_CONV_U2:
1779                         x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
1780                         break;
1781                 case OP_COMPARE:
1782                         x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
1783                         break;
1784                 case OP_COMPARE_IMM:
1785                         x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
1786                         break;
1787                 case OP_X86_COMPARE_MEMBASE_REG:
1788                         x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
1789                         break;
1790                 case OP_X86_COMPARE_MEMBASE_IMM:
1791                         x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1792                         break;
1793                 case OP_X86_COMPARE_MEMBASE8_IMM:
1794                         x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1795                         break;
1796                 case OP_X86_COMPARE_REG_MEMBASE:
1797                         x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
1798                         break;
1799                 case OP_X86_COMPARE_MEM_IMM:
1800                         x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
1801                         break;
1802                 case OP_X86_TEST_NULL:
1803                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
1804                         break;
1805                 case OP_X86_ADD_MEMBASE_IMM:
1806                         x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1807                         break;
1808                 case OP_X86_ADD_MEMBASE:
1809                         x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
1810                         break;
1811                 case OP_X86_SUB_MEMBASE_IMM:
1812                         x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
1813                         break;
1814                 case OP_X86_SUB_MEMBASE:
1815                         x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
1816                         break;
1817                 case OP_X86_INC_MEMBASE:
1818                         x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
1819                         break;
1820                 case OP_X86_INC_REG:
1821                         x86_inc_reg (code, ins->dreg);
1822                         break;
1823                 case OP_X86_DEC_MEMBASE:
1824                         x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
1825                         break;
1826                 case OP_X86_DEC_REG:
1827                         x86_dec_reg (code, ins->dreg);
1828                         break;
1829                 case OP_X86_MUL_MEMBASE:
1830                         x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
1831                         break;
1832                 case CEE_BREAK:
1833                         x86_breakpoint (code);
1834                         break;
1835                 case OP_ADDCC:
1836                 case CEE_ADD:
1837                         x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
1838                         break;
1839                 case OP_ADC:
1840                         x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
1841                         break;
1842                 case OP_ADDCC_IMM:
1843                 case OP_ADD_IMM:
1844                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
1845                         break;
1846                 case OP_ADC_IMM:
1847                         x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
1848                         break;
1849                 case OP_SUBCC:
1850                 case CEE_SUB:
1851                         x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
1852                         break;
1853                 case OP_SBB:
1854                         x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
1855                         break;
1856                 case OP_SUBCC_IMM:
1857                 case OP_SUB_IMM:
1858                         x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
1859                         break;
1860                 case OP_SBB_IMM:
1861                         x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
1862                         break;
1863                 case CEE_AND:
1864                         x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
1865                         break;
1866                 case OP_AND_IMM:
1867                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
1868                         break;
1869                 case CEE_DIV:
1870                         x86_cdq (code);
1871                         x86_div_reg (code, ins->sreg2, TRUE);
1872                         break;
1873                 case CEE_DIV_UN:
1874                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1875                         x86_div_reg (code, ins->sreg2, FALSE);
1876                         break;
1877                 case OP_DIV_IMM:
1878                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1879                         x86_cdq (code);
1880                         x86_div_reg (code, ins->sreg2, TRUE);
1881                         break;
1882                 case CEE_REM:
1883                         x86_cdq (code);
1884                         x86_div_reg (code, ins->sreg2, TRUE);
1885                         break;
1886                 case CEE_REM_UN:
1887                         x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
1888                         x86_div_reg (code, ins->sreg2, FALSE);
1889                         break;
1890                 case OP_REM_IMM:
1891                         x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
1892                         x86_cdq (code);
1893                         x86_div_reg (code, ins->sreg2, TRUE);
1894                         break;
1895                 case CEE_OR:
1896                         x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
1897                         break;
1898                 case OP_OR_IMM:
1899                         x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
1900                         break;
1901                 case CEE_XOR:
1902                         x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
1903                         break;
1904                 case OP_XOR_IMM:
1905                         x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
1906                         break;
1907                 case CEE_SHL:
1908                         g_assert (ins->sreg2 == X86_ECX);
1909                         x86_shift_reg (code, X86_SHL, ins->dreg);
1910                         break;
1911                 case CEE_SHR:
1912                         g_assert (ins->sreg2 == X86_ECX);
1913                         x86_shift_reg (code, X86_SAR, ins->dreg);
1914                         break;
1915                 case OP_SHR_IMM:
1916                         x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
1917                         break;
1918                 case OP_SHR_UN_IMM:
1919                         x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
1920                         break;
1921                 case CEE_SHR_UN:
1922                         g_assert (ins->sreg2 == X86_ECX);
1923                         x86_shift_reg (code, X86_SHR, ins->dreg);
1924                         break;
1925                 case OP_SHL_IMM:
1926                         x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
1927                         break;
1928                 case OP_LSHL: {
1929                         guint8 *jump_to_end;
1930
1931                         /* handle shifts below 32 bits */
1932                         x86_shld_reg (code, ins->unused, ins->sreg1);
1933                         x86_shift_reg (code, X86_SHL, ins->sreg1);
1934
1935                         x86_test_reg_imm (code, X86_ECX, 32);
1936                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
1937
1938                         /* handle shift over 32 bit */
1939                         x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
1940                         x86_clear_reg (code, ins->sreg1);
1941                         
1942                         x86_patch (jump_to_end, code);
1943                         }
1944                         break;
1945                 case OP_LSHR: {
1946                         guint8 *jump_to_end;
1947
1948                         /* handle shifts below 32 bits */
1949                         x86_shrd_reg (code, ins->sreg1, ins->unused);
1950                         x86_shift_reg (code, X86_SAR, ins->unused);
1951
1952                         x86_test_reg_imm (code, X86_ECX, 32);
1953                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
1954
1955                         /* handle shifts over 31 bits */
1956                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
1957                         x86_shift_reg_imm (code, X86_SAR, ins->unused, 31);
1958                         
1959                         x86_patch (jump_to_end, code);
1960                         }
1961                         break;
1962                 case OP_LSHR_UN: {
1963                         guint8 *jump_to_end;
1964
1965                         /* handle shifts below 32 bits */
1966                         x86_shrd_reg (code, ins->sreg1, ins->unused);
1967                         x86_shift_reg (code, X86_SHR, ins->unused);
1968
1969                         x86_test_reg_imm (code, X86_ECX, 32);
1970                         jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
1971
1972                         /* handle shifts over 31 bits */
1973                         x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
1974                         x86_shift_reg_imm (code, X86_SHR, ins->unused, 31);
1975                         
1976                         x86_patch (jump_to_end, code);
1977                         }
1978                         break;
1979                 case OP_LSHL_IMM:
1980                         if (ins->inst_imm >= 32) {
1981                                 x86_mov_reg_reg (code, ins->unused, ins->sreg1, 4);
1982                                 x86_clear_reg (code, ins->sreg1);
1983                                 x86_shift_reg_imm (code, X86_SHL, ins->unused, ins->inst_imm - 32);
1984                         } else {
1985                                 x86_shld_reg_imm (code, ins->unused, ins->sreg1, ins->inst_imm);
1986                                 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
1987                         }
1988                         break;
1989                 case OP_LSHR_IMM:
1990                         if (ins->inst_imm >= 32) {
1991                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused,  4);
1992                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, 0x1f);
1993                                 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
1994                         } else {
1995                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
1996                                 x86_shift_reg_imm (code, X86_SAR, ins->unused, ins->inst_imm);
1997                         }
1998                         break;
1999                 case OP_LSHR_UN_IMM:
2000                         if (ins->inst_imm >= 32) {
2001                                 x86_mov_reg_reg (code, ins->sreg1, ins->unused, 4);
2002                                 x86_clear_reg (code, ins->unused);
2003                                 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2004                         } else {
2005                                 x86_shrd_reg_imm (code, ins->sreg1, ins->unused, ins->inst_imm);
2006                                 x86_shift_reg_imm (code, X86_SHR, ins->unused, ins->inst_imm);
2007                         }
2008                         break;
2009                 case CEE_NOT:
2010                         x86_not_reg (code, ins->sreg1);
2011                         break;
2012                 case CEE_NEG:
2013                         x86_neg_reg (code, ins->sreg1);
2014                         break;
2015                 case OP_SEXT_I1:
2016                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2017                         break;
2018                 case OP_SEXT_I2:
2019                         x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2020                         break;
2021                 case CEE_MUL:
2022                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2023                         break;
2024                 case OP_MUL_IMM:
2025                         switch (ins->inst_imm) {
2026                         case 2:
2027                                 /* MOV r1, r2 */
2028                                 /* ADD r1, r1 */
2029                                 if (ins->dreg != ins->sreg1)
2030                                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2031                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2032                                 break;
2033                         case 3:
2034                                 /* LEA r1, [r2 + r2*2] */
2035                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2036                                 break;
2037                         case 5:
2038                                 /* LEA r1, [r2 + r2*4] */
2039                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2040                                 break;
2041                         case 6:
2042                                 /* LEA r1, [r2 + r2*2] */
2043                                 /* ADD r1, r1          */
2044                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2045                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2046                                 break;
2047                         case 9:
2048                                 /* LEA r1, [r2 + r2*8] */
2049                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2050                                 break;
2051                         case 10:
2052                                 /* LEA r1, [r2 + r2*4] */
2053                                 /* ADD r1, r1          */
2054                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2055                                 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2056                                 break;
2057                         case 12:
2058                                 /* LEA r1, [r2 + r2*2] */
2059                                 /* SHL r1, 2           */
2060                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2061                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2062                                 break;
2063                         case 25:
2064                                 /* LEA r1, [r2 + r2*4] */
2065                                 /* LEA r1, [r1 + r1*4] */
2066                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2067                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2068                                 break;
2069                         case 100:
2070                                 /* LEA r1, [r2 + r2*4] */
2071                                 /* SHL r1, 2           */
2072                                 /* LEA r1, [r1 + r1*4] */
2073                                 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2074                                 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2075                                 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2076                                 break;
2077                         default:
2078                                 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2079                                 break;
2080                         }
2081                         break;
2082                 case CEE_MUL_OVF:
2083                         x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2084                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2085                         break;
2086                 case CEE_MUL_OVF_UN: {
2087                         /* the mul operation and the exception check should most likely be split */
2088                         int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2089                         /*g_assert (ins->sreg2 == X86_EAX);
2090                         g_assert (ins->dreg == X86_EAX);*/
2091                         if (ins->sreg2 == X86_EAX) {
2092                                 non_eax_reg = ins->sreg1;
2093                         } else if (ins->sreg1 == X86_EAX) {
2094                                 non_eax_reg = ins->sreg2;
2095                         } else {
2096                                 /* no need to save since we're going to store to it anyway */
2097                                 if (ins->dreg != X86_EAX) {
2098                                         saved_eax = TRUE;
2099                                         x86_push_reg (code, X86_EAX);
2100                                 }
2101                                 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2102                                 non_eax_reg = ins->sreg2;
2103                         }
2104                         if (ins->dreg == X86_EDX) {
2105                                 if (!saved_eax) {
2106                                         saved_eax = TRUE;
2107                                         x86_push_reg (code, X86_EAX);
2108                                 }
2109                         } else if (ins->dreg != X86_EAX) {
2110                                 saved_edx = TRUE;
2111                                 x86_push_reg (code, X86_EDX);
2112                         }
2113                         x86_mul_reg (code, non_eax_reg, FALSE);
2114                         /* save before the check since pop and mov don't change the flags */
2115                         if (ins->dreg != X86_EAX)
2116                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2117                         if (saved_edx)
2118                                 x86_pop_reg (code, X86_EDX);
2119                         if (saved_eax)
2120                                 x86_pop_reg (code, X86_EAX);
2121                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2122                         break;
2123                 }
2124                 case OP_ICONST:
2125                         x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2126                         break;
2127                 case OP_AOTCONST:
2128                         g_assert_not_reached ();
2129                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2130                         x86_mov_reg_imm (code, ins->dreg, 0);
2131                         break;
2132                 case OP_LOAD_GOTADDR:
2133                         x86_call_imm (code, 0);
2134                         /* 
2135                          * The patch needs to point to the pop, since the GOT offset needs 
2136                          * to be added to that address.
2137                          */
2138                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2139                         x86_pop_reg (code, ins->dreg);
2140                         x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2141                         break;
2142                 case OP_GOT_ENTRY:
2143                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2144                         x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2145                         break;
2146                 case OP_X86_PUSH_GOT_ENTRY:
2147                         mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2148                         x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2149                         break;
2150                 case CEE_CONV_I4:
2151                 case OP_MOVE:
2152                         x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2153                         break;
2154                 case CEE_CONV_U4:
2155                         g_assert_not_reached ();
2156                 case CEE_JMP: {
2157                         /*
2158                          * Note: this 'frame destruction' logic is useful for tail calls, too.
2159                          * Keep in sync with the code in emit_epilog.
2160                          */
2161                         int pos = 0;
2162
2163                         /* FIXME: no tracing support... */
2164                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2165                                 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2166                         /* reset offset to make max_len work */
2167                         offset = code - cfg->native_code;
2168
2169                         g_assert (!cfg->method->save_lmf);
2170
2171                         if (cfg->used_int_regs & (1 << X86_EBX))
2172                                 pos -= 4;
2173                         if (cfg->used_int_regs & (1 << X86_EDI))
2174                                 pos -= 4;
2175                         if (cfg->used_int_regs & (1 << X86_ESI))
2176                                 pos -= 4;
2177                         if (pos)
2178                                 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2179         
2180                         if (cfg->used_int_regs & (1 << X86_ESI))
2181                                 x86_pop_reg (code, X86_ESI);
2182                         if (cfg->used_int_regs & (1 << X86_EDI))
2183                                 x86_pop_reg (code, X86_EDI);
2184                         if (cfg->used_int_regs & (1 << X86_EBX))
2185                                 x86_pop_reg (code, X86_EBX);
2186         
2187                         /* restore ESP/EBP */
2188                         x86_leave (code);
2189                         offset = code - cfg->native_code;
2190                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2191                         x86_jump32 (code, 0);
2192                         break;
2193                 }
2194                 case OP_CHECK_THIS:
2195                         /* ensure ins->sreg1 is not NULL
2196                          * note that cmp DWORD PTR [eax], eax is one byte shorter than
2197                          * cmp DWORD PTR [eax], 0
2198                          */
2199                         x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2200                         break;
2201                 case OP_ARGLIST: {
2202                         int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2203                         x86_push_reg (code, hreg);
2204                         x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2205                         x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2206                         x86_pop_reg (code, hreg);
2207                         break;
2208                 }
2209                 case OP_FCALL:
2210                 case OP_LCALL:
2211                 case OP_VCALL:
2212                 case OP_VOIDCALL:
2213                 case CEE_CALL:
2214                         call = (MonoCallInst*)ins;
2215                         if (ins->flags & MONO_INST_HAS_METHOD)
2216                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2217                         else
2218                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2219                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2220                                 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2221                                  * bytes to pop, we want to use pops. GCC does this (note it won't happen
2222                                  * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2223                                  * smart enough to do that optimization yet
2224                                  *
2225                                  * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2226                                  * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2227                                  * (most likely from locality benefits). People with other processors should
2228                                  * check on theirs to see what happens.
2229                                  */
2230                                 if (call->stack_usage == 4) {
2231                                         /* we want to use registers that won't get used soon, so use
2232                                          * ecx, as eax will get allocated first. edx is used by long calls,
2233                                          * so we can't use that.
2234                                          */
2235                                         
2236                                         x86_pop_reg (code, X86_ECX);
2237                                 } else {
2238                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2239                                 }
2240                         }
2241                         code = emit_move_return_value (cfg, ins, code);
2242                         break;
2243                 case OP_FCALL_REG:
2244                 case OP_LCALL_REG:
2245                 case OP_VCALL_REG:
2246                 case OP_VOIDCALL_REG:
2247                 case OP_CALL_REG:
2248                         call = (MonoCallInst*)ins;
2249                         x86_call_reg (code, ins->sreg1);
2250                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2251                                 if (call->stack_usage == 4)
2252                                         x86_pop_reg (code, X86_ECX);
2253                                 else
2254                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2255                         }
2256                         code = emit_move_return_value (cfg, ins, code);
2257                         break;
2258                 case OP_FCALL_MEMBASE:
2259                 case OP_LCALL_MEMBASE:
2260                 case OP_VCALL_MEMBASE:
2261                 case OP_VOIDCALL_MEMBASE:
2262                 case OP_CALL_MEMBASE:
2263                         call = (MonoCallInst*)ins;
2264                         x86_call_membase (code, ins->sreg1, ins->inst_offset);
2265                         if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2266                                 if (call->stack_usage == 4)
2267                                         x86_pop_reg (code, X86_ECX);
2268                                 else
2269                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2270                         }
2271                         code = emit_move_return_value (cfg, ins, code);
2272                         break;
2273                 case OP_OUTARG:
2274                 case OP_X86_PUSH:
2275                         x86_push_reg (code, ins->sreg1);
2276                         break;
2277                 case OP_X86_PUSH_IMM:
2278                         x86_push_imm (code, ins->inst_imm);
2279                         break;
2280                 case OP_X86_PUSH_MEMBASE:
2281                         x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2282                         break;
2283                 case OP_X86_PUSH_OBJ: 
2284                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2285                         x86_push_reg (code, X86_EDI);
2286                         x86_push_reg (code, X86_ESI);
2287                         x86_push_reg (code, X86_ECX);
2288                         if (ins->inst_offset)
2289                                 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2290                         else
2291                                 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2292                         x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2293                         x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2294                         x86_cld (code);
2295                         x86_prefix (code, X86_REP_PREFIX);
2296                         x86_movsd (code);
2297                         x86_pop_reg (code, X86_ECX);
2298                         x86_pop_reg (code, X86_ESI);
2299                         x86_pop_reg (code, X86_EDI);
2300                         break;
2301                 case OP_X86_LEA:
2302                         x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->unused);
2303                         break;
2304                 case OP_X86_LEA_MEMBASE:
2305                         x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2306                         break;
2307                 case OP_X86_XCHG:
2308                         x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2309                         break;
2310                 case OP_LOCALLOC:
2311                         /* keep alignment */
2312                         x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
2313                         x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_FRAME_ALIGNMENT - 1));
2314                         code = mono_emit_stack_alloc (code, ins);
2315                         x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2316                         break;
2317                 case CEE_RET:
2318                         x86_ret (code);
2319                         break;
2320                 case CEE_THROW: {
2321                         x86_push_reg (code, ins->sreg1);
2322                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2323                                                           (gpointer)"mono_arch_throw_exception");
2324                         break;
2325                 }
2326                 case OP_RETHROW: {
2327                         x86_push_reg (code, ins->sreg1);
2328                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
2329                                                           (gpointer)"mono_arch_rethrow_exception");
2330                         break;
2331                 }
2332                 case OP_CALL_HANDLER: 
2333                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2334                         x86_call_imm (code, 0);
2335                         break;
2336                 case OP_LABEL:
2337                         ins->inst_c0 = code - cfg->native_code;
2338                         break;
2339                 case CEE_BR:
2340                         //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
2341                         //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
2342                         //break;
2343                         if (ins->flags & MONO_INST_BRLABEL) {
2344                                 if (ins->inst_i0->inst_c0) {
2345                                         x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2346                                 } else {
2347                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2348                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2349                                             x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2350                                                 x86_jump8 (code, 0);
2351                                         else 
2352                                                 x86_jump32 (code, 0);
2353                                 }
2354                         } else {
2355                                 if (ins->inst_target_bb->native_offset) {
2356                                         x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); 
2357                                 } else {
2358                                         mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2359                                         if ((cfg->opt & MONO_OPT_BRANCH) &&
2360                                             x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2361                                                 x86_jump8 (code, 0);
2362                                         else 
2363                                                 x86_jump32 (code, 0);
2364                                 } 
2365                         }
2366                         break;
2367                 case OP_BR_REG:
2368                         x86_jump_reg (code, ins->sreg1);
2369                         break;
2370                 case OP_CEQ:
2371                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2372                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2373                         break;
2374                 case OP_CLT:
2375                         x86_set_reg (code, X86_CC_LT, ins->dreg, TRUE);
2376                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2377                         break;
2378                 case OP_CLT_UN:
2379                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2380                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2381                         break;
2382                 case OP_CGT:
2383                         x86_set_reg (code, X86_CC_GT, ins->dreg, TRUE);
2384                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2385                         break;
2386                 case OP_CGT_UN:
2387                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2388                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2389                         break;
2390                 case OP_CNE:
2391                         x86_set_reg (code, X86_CC_NE, ins->dreg, TRUE);
2392                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2393                         break;
2394                 case OP_COND_EXC_EQ:
2395                 case OP_COND_EXC_NE_UN:
2396                 case OP_COND_EXC_LT:
2397                 case OP_COND_EXC_LT_UN:
2398                 case OP_COND_EXC_GT:
2399                 case OP_COND_EXC_GT_UN:
2400                 case OP_COND_EXC_GE:
2401                 case OP_COND_EXC_GE_UN:
2402                 case OP_COND_EXC_LE:
2403                 case OP_COND_EXC_LE_UN:
2404                 case OP_COND_EXC_OV:
2405                 case OP_COND_EXC_NO:
2406                 case OP_COND_EXC_C:
2407                 case OP_COND_EXC_NC:
2408                         EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], 
2409                                                     (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2410                         break;
2411                 case CEE_BEQ:
2412                 case CEE_BNE_UN:
2413                 case CEE_BLT:
2414                 case CEE_BLT_UN:
2415                 case CEE_BGT:
2416                 case CEE_BGT_UN:
2417                 case CEE_BGE:
2418                 case CEE_BGE_UN:
2419                 case CEE_BLE:
2420                 case CEE_BLE_UN:
2421                         EMIT_COND_BRANCH (ins, branch_cc_table [ins->opcode - CEE_BEQ], (ins->opcode < CEE_BNE_UN));
2422                         break;
2423
2424                 /* floating point opcodes */
2425                 case OP_R8CONST: {
2426                         double d = *(double *)ins->inst_p0;
2427
2428                         if ((d == 0.0) && (mono_signbit (d) == 0)) {
2429                                 x86_fldz (code);
2430                         } else if (d == 1.0) {
2431                                 x86_fld1 (code);
2432                         } else {
2433                                 if (cfg->compile_aot) {
2434                                         guint32 *val = (guint32*)&d;
2435                                         x86_push_imm (code, val [1]);
2436                                         x86_push_imm (code, val [0]);
2437                                         x86_fld_membase (code, X86_ESP, 0, TRUE);
2438                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2439                                 }
2440                                 else {
2441                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2442                                         x86_fld (code, NULL, TRUE);
2443                                 }
2444                         }
2445                         break;
2446                 }
2447                 case OP_R4CONST: {
2448                         float f = *(float *)ins->inst_p0;
2449
2450                         if ((f == 0.0) && (mono_signbit (f) == 0)) {
2451                                 x86_fldz (code);
2452                         } else if (f == 1.0) {
2453                                 x86_fld1 (code);
2454                         } else {
2455                                 if (cfg->compile_aot) {
2456                                         guint32 val = *(guint32*)&f;
2457                                         x86_push_imm (code, val);
2458                                         x86_fld_membase (code, X86_ESP, 0, FALSE);
2459                                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2460                                 }
2461                                 else {
2462                                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2463                                         x86_fld (code, NULL, FALSE);
2464                                 }
2465                         }
2466                         break;
2467                 }
2468                 case OP_STORER8_MEMBASE_REG:
2469                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2470                         break;
2471                 case OP_LOADR8_SPILL_MEMBASE:
2472                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2473                         x86_fxch (code, 1);
2474                         break;
2475                 case OP_LOADR8_MEMBASE:
2476                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2477                         break;
2478                 case OP_STORER4_MEMBASE_REG:
2479                         x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
2480                         break;
2481                 case OP_LOADR4_MEMBASE:
2482                         x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2483                         break;
2484                 case CEE_CONV_R4: /* FIXME: change precision */
2485                 case CEE_CONV_R8:
2486                         x86_push_reg (code, ins->sreg1);
2487                         x86_fild_membase (code, X86_ESP, 0, FALSE);
2488                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2489                         break;
2490                 case OP_X86_FP_LOAD_I8:
2491                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
2492                         break;
2493                 case OP_X86_FP_LOAD_I4:
2494                         x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
2495                         break;
2496                 case OP_FCONV_TO_I1:
2497                         code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
2498                         break;
2499                 case OP_FCONV_TO_U1:
2500                         code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
2501                         break;
2502                 case OP_FCONV_TO_I2:
2503                         code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
2504                         break;
2505                 case OP_FCONV_TO_U2:
2506                         code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
2507                         break;
2508                 case OP_FCONV_TO_I4:
2509                 case OP_FCONV_TO_I:
2510                         code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
2511                         break;
2512                 case OP_FCONV_TO_I8:
2513                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
2514                         x86_fnstcw_membase(code, X86_ESP, 0);
2515                         x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
2516                         x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
2517                         x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
2518                         x86_fldcw_membase (code, X86_ESP, 2);
2519                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
2520                         x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
2521                         x86_pop_reg (code, ins->dreg);
2522                         x86_pop_reg (code, ins->unused);
2523                         x86_fldcw_membase (code, X86_ESP, 0);
2524                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2525                         break;
2526                 case OP_LCONV_TO_R_UN: { 
2527                         static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
2528                         guint8 *br;
2529
2530                         /* load 64bit integer to FP stack */
2531                         x86_push_imm (code, 0);
2532                         x86_push_reg (code, ins->sreg2);
2533                         x86_push_reg (code, ins->sreg1);
2534                         x86_fild_membase (code, X86_ESP, 0, TRUE);
2535                         /* store as 80bit FP value */
2536                         x86_fst80_membase (code, X86_ESP, 0);
2537                         
2538                         /* test if lreg is negative */
2539                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2540                         br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
2541         
2542                         /* add correction constant mn */
2543                         x86_fld80_mem (code, mn);
2544                         x86_fld80_membase (code, X86_ESP, 0);
2545                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2546                         x86_fst80_membase (code, X86_ESP, 0);
2547
2548                         x86_patch (br, code);
2549
2550                         x86_fld80_membase (code, X86_ESP, 0);
2551                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 12);
2552
2553                         break;
2554                 }
2555                 case OP_LCONV_TO_OVF_I: {
2556                         guint8 *br [3], *label [1];
2557
2558                         /* 
2559                          * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
2560                          */
2561                         x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2562
2563                         /* If the low word top bit is set, see if we are negative */
2564                         br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
2565                         /* We are not negative (no top bit set, check for our top word to be zero */
2566                         x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
2567                         br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2568                         label [0] = code;
2569
2570                         /* throw exception */
2571                         mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
2572                         x86_jump32 (code, 0);
2573         
2574                         x86_patch (br [0], code);
2575                         /* our top bit is set, check that top word is 0xfffffff */
2576                         x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
2577                 
2578                         x86_patch (br [1], code);
2579                         /* nope, emit exception */
2580                         br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
2581                         x86_patch (br [2], label [0]);
2582
2583                         if (ins->dreg != ins->sreg1)
2584                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2585                         break;
2586                 }
2587                 case OP_FADD:
2588                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2589                         break;
2590                 case OP_FSUB:
2591                         x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
2592                         break;          
2593                 case OP_FMUL:
2594                         x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
2595                         break;          
2596                 case OP_FDIV:
2597                         x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
2598                         break;          
2599                 case OP_FNEG:
2600                         x86_fchs (code);
2601                         break;          
2602                 case OP_SIN:
2603                         x86_fsin (code);
2604                         x86_fldz (code);
2605                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2606                         break;          
2607                 case OP_COS:
2608                         x86_fcos (code);
2609                         x86_fldz (code);
2610                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2611                         break;          
2612                 case OP_ABS:
2613                         x86_fabs (code);
2614                         break;          
2615                 case OP_TAN: {
2616                         /* 
2617                          * it really doesn't make sense to inline all this code,
2618                          * it's here just to show that things may not be as simple 
2619                          * as they appear.
2620                          */
2621                         guchar *check_pos, *end_tan, *pop_jump;
2622                         x86_push_reg (code, X86_EAX);
2623                         x86_fptan (code);
2624                         x86_fnstsw (code);
2625                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2626                         check_pos = code;
2627                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2628                         x86_fstp (code, 0); /* pop the 1.0 */
2629                         end_tan = code;
2630                         x86_jump8 (code, 0);
2631                         x86_fldpi (code);
2632                         x86_fp_op (code, X86_FADD, 0);
2633                         x86_fxch (code, 1);
2634                         x86_fprem1 (code);
2635                         x86_fstsw (code);
2636                         x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
2637                         pop_jump = code;
2638                         x86_branch8 (code, X86_CC_NE, 0, FALSE);
2639                         x86_fstp (code, 1);
2640                         x86_fptan (code);
2641                         x86_patch (pop_jump, code);
2642                         x86_fstp (code, 0); /* pop the 1.0 */
2643                         x86_patch (check_pos, code);
2644                         x86_patch (end_tan, code);
2645                         x86_fldz (code);
2646                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2647                         x86_pop_reg (code, X86_EAX);
2648                         break;
2649                 }
2650                 case OP_ATAN:
2651                         x86_fld1 (code);
2652                         x86_fpatan (code);
2653                         x86_fldz (code);
2654                         x86_fp_op_reg (code, X86_FADD, 1, TRUE);
2655                         break;          
2656                 case OP_SQRT:
2657                         x86_fsqrt (code);
2658                         break;          
2659                 case OP_X86_FPOP:
2660                         x86_fstp (code, 0);
2661                         break;          
2662                 case OP_FREM: {
2663                         guint8 *l1, *l2;
2664
2665                         x86_push_reg (code, X86_EAX);
2666                         /* we need to exchange ST(0) with ST(1) */
2667                         x86_fxch (code, 1);
2668
2669                         /* this requires a loop, because fprem somtimes 
2670                          * returns a partial remainder */
2671                         l1 = code;
2672                         /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
2673                         /* x86_fprem1 (code); */
2674                         x86_fprem (code);
2675                         x86_fnstsw (code);
2676                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
2677                         l2 = code + 2;
2678                         x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
2679
2680                         /* pop result */
2681                         x86_fstp (code, 1);
2682
2683                         x86_pop_reg (code, X86_EAX);
2684                         break;
2685                 }
2686                 case OP_FCOMPARE:
2687                         if (cfg->opt & MONO_OPT_FCMOV) {
2688                                 x86_fcomip (code, 1);
2689                                 x86_fstp (code, 0);
2690                                 break;
2691                         }
2692                         /* this overwrites EAX */
2693                         EMIT_FPCOMPARE(code);
2694                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2695                         break;
2696                 case OP_FCEQ:
2697                         if (cfg->opt & MONO_OPT_FCMOV) {
2698                                 /* zeroing the register at the start results in 
2699                                  * shorter and faster code (we can also remove the widening op)
2700                                  */
2701                                 guchar *unordered_check;
2702                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2703                                 x86_fcomip (code, 1);
2704                                 x86_fstp (code, 0);
2705                                 unordered_check = code;
2706                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2707                                 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
2708                                 x86_patch (unordered_check, code);
2709                                 break;
2710                         }
2711                         if (ins->dreg != X86_EAX) 
2712                                 x86_push_reg (code, X86_EAX);
2713
2714                         EMIT_FPCOMPARE(code);
2715                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2716                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2717                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2718                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2719
2720                         if (ins->dreg != X86_EAX) 
2721                                 x86_pop_reg (code, X86_EAX);
2722                         break;
2723                 case OP_FCLT:
2724                 case OP_FCLT_UN:
2725                         if (cfg->opt & MONO_OPT_FCMOV) {
2726                                 /* zeroing the register at the start results in 
2727                                  * shorter and faster code (we can also remove the widening op)
2728                                  */
2729                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2730                                 x86_fcomip (code, 1);
2731                                 x86_fstp (code, 0);
2732                                 if (ins->opcode == OP_FCLT_UN) {
2733                                         guchar *unordered_check = code;
2734                                         guchar *jump_to_end;
2735                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2736                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2737                                         jump_to_end = code;
2738                                         x86_jump8 (code, 0);
2739                                         x86_patch (unordered_check, code);
2740                                         x86_inc_reg (code, ins->dreg);
2741                                         x86_patch (jump_to_end, code);
2742                                 } else {
2743                                         x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
2744                                 }
2745                                 break;
2746                         }
2747                         if (ins->dreg != X86_EAX) 
2748                                 x86_push_reg (code, X86_EAX);
2749
2750                         EMIT_FPCOMPARE(code);
2751                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2752                         if (ins->opcode == OP_FCLT_UN) {
2753                                 guchar *is_not_zero_check, *end_jump;
2754                                 is_not_zero_check = code;
2755                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2756                                 end_jump = code;
2757                                 x86_jump8 (code, 0);
2758                                 x86_patch (is_not_zero_check, code);
2759                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2760
2761                                 x86_patch (end_jump, code);
2762                         }
2763                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2764                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2765
2766                         if (ins->dreg != X86_EAX) 
2767                                 x86_pop_reg (code, X86_EAX);
2768                         break;
2769                 case OP_FCGT:
2770                 case OP_FCGT_UN:
2771                         if (cfg->opt & MONO_OPT_FCMOV) {
2772                                 /* zeroing the register at the start results in 
2773                                  * shorter and faster code (we can also remove the widening op)
2774                                  */
2775                                 guchar *unordered_check;
2776                                 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
2777                                 x86_fcomip (code, 1);
2778                                 x86_fstp (code, 0);
2779                                 if (ins->opcode == OP_FCGT) {
2780                                         unordered_check = code;
2781                                         x86_branch8 (code, X86_CC_P, 0, FALSE);
2782                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2783                                         x86_patch (unordered_check, code);
2784                                 } else {
2785                                         x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
2786                                 }
2787                                 break;
2788                         }
2789                         if (ins->dreg != X86_EAX) 
2790                                 x86_push_reg (code, X86_EAX);
2791
2792                         EMIT_FPCOMPARE(code);
2793                         x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
2794                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2795                         if (ins->opcode == OP_FCGT_UN) {
2796                                 guchar *is_not_zero_check, *end_jump;
2797                                 is_not_zero_check = code;
2798                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2799                                 end_jump = code;
2800                                 x86_jump8 (code, 0);
2801                                 x86_patch (is_not_zero_check, code);
2802                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2803         
2804                                 x86_patch (end_jump, code);
2805                         }
2806                         x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
2807                         x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2808
2809                         if (ins->dreg != X86_EAX) 
2810                                 x86_pop_reg (code, X86_EAX);
2811                         break;
2812                 case OP_FBEQ:
2813                         if (cfg->opt & MONO_OPT_FCMOV) {
2814                                 guchar *jump = code;
2815                                 x86_branch8 (code, X86_CC_P, 0, TRUE);
2816                                 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2817                                 x86_patch (jump, code);
2818                                 break;
2819                         }
2820                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
2821                         EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
2822                         break;
2823                 case OP_FBNE_UN:
2824                         /* Branch if C013 != 100 */
2825                         if (cfg->opt & MONO_OPT_FCMOV) {
2826                                 /* branch if !ZF or (PF|CF) */
2827                                 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2828                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2829                                 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
2830                                 break;
2831                         }
2832                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2833                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2834                         break;
2835                 case OP_FBLT:
2836                         if (cfg->opt & MONO_OPT_FCMOV) {
2837                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2838                                 break;
2839                         }
2840                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2841                         break;
2842                 case OP_FBLT_UN:
2843                         if (cfg->opt & MONO_OPT_FCMOV) {
2844                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2845                                 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
2846                                 break;
2847                         }
2848                         if (ins->opcode == OP_FBLT_UN) {
2849                                 guchar *is_not_zero_check, *end_jump;
2850                                 is_not_zero_check = code;
2851                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2852                                 end_jump = code;
2853                                 x86_jump8 (code, 0);
2854                                 x86_patch (is_not_zero_check, code);
2855                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2856
2857                                 x86_patch (end_jump, code);
2858                         }
2859                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2860                         break;
2861                 case OP_FBGT:
2862                 case OP_FBGT_UN:
2863                         if (cfg->opt & MONO_OPT_FCMOV) {
2864                                 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
2865                                 break;
2866                         }
2867                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2868                         if (ins->opcode == OP_FBGT_UN) {
2869                                 guchar *is_not_zero_check, *end_jump;
2870                                 is_not_zero_check = code;
2871                                 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
2872                                 end_jump = code;
2873                                 x86_jump8 (code, 0);
2874                                 x86_patch (is_not_zero_check, code);
2875                                 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
2876
2877                                 x86_patch (end_jump, code);
2878                         }
2879                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2880                         break;
2881                 case OP_FBGE:
2882                         /* Branch if C013 == 100 or 001 */
2883                         if (cfg->opt & MONO_OPT_FCMOV) {
2884                                 guchar *br1;
2885
2886                                 /* skip branch if C1=1 */
2887                                 br1 = code;
2888                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2889                                 /* branch if (C0 | C3) = 1 */
2890                                 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
2891                                 x86_patch (br1, code);
2892                                 break;
2893                         }
2894                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2895                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2896                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
2897                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2898                         break;
2899                 case OP_FBGE_UN:
2900                         /* Branch if C013 == 000 */
2901                         if (cfg->opt & MONO_OPT_FCMOV) {
2902                                 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
2903                                 break;
2904                         }
2905                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2906                         break;
2907                 case OP_FBLE:
2908                         /* Branch if C013=000 or 100 */
2909                         if (cfg->opt & MONO_OPT_FCMOV) {
2910                                 guchar *br1;
2911
2912                                 /* skip branch if C1=1 */
2913                                 br1 = code;
2914                                 x86_branch8 (code, X86_CC_P, 0, FALSE);
2915                                 /* branch if C0=0 */
2916                                 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
2917                                 x86_patch (br1, code);
2918                                 break;
2919                         }
2920                         x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
2921                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
2922                         EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
2923                         break;
2924                 case OP_FBLE_UN:
2925                         /* Branch if C013 != 001 */
2926                         if (cfg->opt & MONO_OPT_FCMOV) {
2927                                 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
2928                                 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
2929                                 break;
2930                         }
2931                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2932                         EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
2933                         break;
2934                 case CEE_CKFINITE: {
2935                         x86_push_reg (code, X86_EAX);
2936                         x86_fxam (code);
2937                         x86_fnstsw (code);
2938                         x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
2939                         x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
2940                         x86_pop_reg (code, X86_EAX);
2941                         EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
2942                         break;
2943                 }
2944                 case OP_TLS_GET: {
2945                         code = emit_tls_get (code, ins->dreg, ins->inst_offset);
2946                         break;
2947                 }
2948                 case OP_MEMORY_BARRIER: {
2949                         /* Not needed on x86 */
2950                         break;
2951                 }
2952                 case OP_ATOMIC_ADD_I4: {
2953                         int dreg = ins->dreg;
2954
2955                         if (dreg == ins->inst_basereg) {
2956                                 x86_push_reg (code, ins->sreg2);
2957                                 dreg = ins->sreg2;
2958                         } 
2959                         
2960                         if (dreg != ins->sreg2)
2961                                 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
2962
2963                         x86_prefix (code, X86_LOCK_PREFIX);
2964                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
2965
2966                         if (dreg != ins->dreg) {
2967                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
2968                                 x86_pop_reg (code, dreg);
2969                         }
2970
2971                         break;
2972                 }
2973                 case OP_ATOMIC_ADD_NEW_I4: {
2974                         int dreg = ins->dreg;
2975
2976                         /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
2977                         if (ins->sreg2 == dreg) {
2978                                 if (dreg == X86_EBX) {
2979                                         dreg = X86_EDI;
2980                                         if (ins->inst_basereg == X86_EDI)
2981                                                 dreg = X86_ESI;
2982                                 } else {
2983                                         dreg = X86_EBX;
2984                                         if (ins->inst_basereg == X86_EBX)
2985                                                 dreg = X86_EDI;
2986                                 }
2987                         } else if (ins->inst_basereg == dreg) {
2988                                 if (dreg == X86_EBX) {
2989                                         dreg = X86_EDI;
2990                                         if (ins->sreg2 == X86_EDI)
2991                                                 dreg = X86_ESI;
2992                                 } else {
2993                                         dreg = X86_EBX;
2994                                         if (ins->sreg2 == X86_EBX)
2995                                                 dreg = X86_EDI;
2996                                 }
2997                         }
2998
2999                         if (dreg != ins->dreg) {
3000                                 x86_push_reg (code, dreg);
3001                         }
3002
3003                         x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3004                         x86_prefix (code, X86_LOCK_PREFIX);
3005                         x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3006                         /* dreg contains the old value, add with sreg2 value */
3007                         x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3008                         
3009                         if (ins->dreg != dreg) {
3010                                 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3011                                 x86_pop_reg (code, dreg);
3012                         }
3013
3014                         break;
3015                 }
3016                 case OP_ATOMIC_EXCHANGE_I4: {
3017                         guchar *br[2];
3018                         int sreg2 = ins->sreg2;
3019                         int breg = ins->inst_basereg;
3020
3021                         /* cmpxchg uses eax as comperand, need to make sure we can use it
3022                          * hack to overcome limits in x86 reg allocator 
3023                          * (req: dreg == eax and sreg2 != eax and breg != eax) 
3024                          */
3025                         if (ins->dreg != X86_EAX)
3026                                 x86_push_reg (code, X86_EAX);
3027                         
3028                         /* We need the EAX reg for the cmpxchg */
3029                         if (ins->sreg2 == X86_EAX) {
3030                                 x86_push_reg (code, X86_EDX);
3031                                 x86_mov_reg_reg (code, X86_EDX, X86_EAX, 4);
3032                                 sreg2 = X86_EDX;
3033                         }
3034
3035                         if (breg == X86_EAX) {
3036                                 x86_push_reg (code, X86_ESI);
3037                                 x86_mov_reg_reg (code, X86_ESI, X86_EAX, 4);
3038                                 breg = X86_ESI;
3039                         }
3040
3041                         x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3042
3043                         br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3044                         x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3045                         br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3046                         x86_patch (br [1], br [0]);
3047
3048                         if (breg != ins->inst_basereg)
3049                                 x86_pop_reg (code, X86_ESI);
3050
3051                         if (ins->dreg != X86_EAX) {
3052                                 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
3053                                 x86_pop_reg (code, X86_EAX);
3054                         }
3055
3056                         if (ins->sreg2 != sreg2)
3057                                 x86_pop_reg (code, X86_EDX);
3058
3059                         break;
3060                 }
3061                 default:
3062                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3063                         g_assert_not_reached ();
3064                 }
3065
3066                 if ((code - cfg->native_code - offset) > max_len) {
3067                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
3068                                    mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
3069                         g_assert_not_reached ();
3070                 }
3071                
3072                 cpos += max_len;
3073
3074                 last_ins = ins;
3075                 last_offset = offset;
3076                 
3077                 ins = ins->next;
3078         }
3079
3080         cfg->code_len = code - cfg->native_code;
3081 }
3082
3083 void
3084 mono_arch_register_lowlevel_calls (void)
3085 {
3086 }
3087
3088 void
3089 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3090 {
3091         MonoJumpInfo *patch_info;
3092         gboolean compile_aot = !run_cctors;
3093
3094         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3095                 unsigned char *ip = patch_info->ip.i + code;
3096                 const unsigned char *target;
3097
3098                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3099
3100                 if (compile_aot) {
3101                         switch (patch_info->type) {
3102                         case MONO_PATCH_INFO_BB:
3103                         case MONO_PATCH_INFO_LABEL:
3104                                 break;
3105                         default:
3106                                 /* No need to patch these */
3107                                 continue;
3108                         }
3109                 }
3110
3111                 switch (patch_info->type) {
3112                 case MONO_PATCH_INFO_IP:
3113                         *((gconstpointer *)(ip)) = target;
3114                         break;
3115                 case MONO_PATCH_INFO_CLASS_INIT: {
3116                         guint8 *code = ip;
3117                         /* Might already been changed to a nop */
3118                         x86_call_code (code, 0);
3119                         x86_patch (ip, target);
3120                         break;
3121                 }
3122                 case MONO_PATCH_INFO_ABS:
3123                 case MONO_PATCH_INFO_METHOD:
3124                 case MONO_PATCH_INFO_METHOD_JUMP:
3125                 case MONO_PATCH_INFO_INTERNAL_METHOD:
3126                 case MONO_PATCH_INFO_BB:
3127                 case MONO_PATCH_INFO_LABEL:
3128                         x86_patch (ip, target);
3129                         break;
3130                 case MONO_PATCH_INFO_NONE:
3131                         break;
3132                 default: {
3133                         guint32 offset = mono_arch_get_patch_offset (ip);
3134                         *((gconstpointer *)(ip + offset)) = target;
3135                         break;
3136                 }
3137                 }
3138         }
3139 }
3140
3141 guint8 *
3142 mono_arch_emit_prolog (MonoCompile *cfg)
3143 {
3144         MonoMethod *method = cfg->method;
3145         MonoBasicBlock *bb;
3146         MonoMethodSignature *sig;
3147         MonoInst *inst;
3148         int alloc_size, pos, max_offset, i;
3149         guint8 *code;
3150
3151         cfg->code_size =  MAX (mono_method_get_header (method)->code_size * 4, 256);
3152         code = cfg->native_code = g_malloc (cfg->code_size);
3153
3154         x86_push_reg (code, X86_EBP);
3155         x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
3156
3157         alloc_size = cfg->stack_offset;
3158         pos = 0;
3159
3160         if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
3161                 /* Might need to attach the thread to the JIT */
3162                 if (lmf_tls_offset != -1) {
3163                         guint8 *buf;
3164
3165                         code = emit_tls_get ( code, X86_EAX, lmf_tls_offset);
3166                         x86_test_reg_reg (code, X86_EAX, X86_EAX);
3167                         buf = code;
3168                         x86_branch8 (code, X86_CC_NE, 0, 0);
3169                         x86_push_imm (code, cfg->domain);
3170                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3171                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3172                         x86_patch (buf, code);
3173 #ifdef PLATFORM_WIN32
3174                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3175                         /* FIXME: Add a separate key for LMF to avoid this */
3176                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3177 #endif
3178                 }
3179                 else {
3180                         g_assert (!cfg->compile_aot);
3181                         x86_push_imm (code, cfg->domain);
3182                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
3183                         x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3184                 }
3185         }
3186
3187         if (method->save_lmf) {
3188                 pos += sizeof (MonoLMF);
3189
3190                 /* save the current IP */
3191                 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
3192                 x86_push_imm_template (code);
3193
3194                 /* save all caller saved regs */
3195                 x86_push_reg (code, X86_EBP);
3196                 x86_push_reg (code, X86_ESI);
3197                 x86_push_reg (code, X86_EDI);
3198                 x86_push_reg (code, X86_EBX);
3199
3200                 /* save method info */
3201                 x86_push_imm (code, method);
3202
3203                 /* get the address of lmf for the current thread */
3204                 /* 
3205                  * This is performance critical so we try to use some tricks to make
3206                  * it fast.
3207                  */
3208                 if (lmf_tls_offset != -1) {
3209                         /* Load lmf quicky using the GS register */
3210                         code = emit_tls_get (code, X86_EAX, lmf_tls_offset);
3211 #ifdef PLATFORM_WIN32
3212                         /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
3213                         /* FIXME: Add a separate key for LMF to avoid this */
3214                         x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
3215 #endif
3216                 }
3217                 else {
3218                         if (cfg->compile_aot) {
3219                                 /* The GOT var does not exist yet */
3220                                 x86_call_imm (code, 0);
3221                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3222                                 x86_pop_reg (code, X86_EAX);
3223                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
3224                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3225                                 x86_call_membase (code, X86_EAX, 0xf0f0f0f0);
3226                         }
3227                         else
3228                                 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
3229                 }
3230
3231                 /* push lmf */
3232                 x86_push_reg (code, X86_EAX); 
3233                 /* push *lfm (previous_lmf) */
3234                 x86_push_membase (code, X86_EAX, 0);
3235                 /* *(lmf) = ESP */
3236                 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
3237         } else {
3238
3239                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3240                         x86_push_reg (code, X86_EBX);
3241                         pos += 4;
3242                 }
3243
3244                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3245                         x86_push_reg (code, X86_EDI);
3246                         pos += 4;
3247                 }
3248
3249                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3250                         x86_push_reg (code, X86_ESI);
3251                         pos += 4;
3252                 }
3253         }
3254
3255         alloc_size -= pos;
3256
3257         if (alloc_size) {
3258                 /* See mono_emit_stack_alloc */
3259 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3260                 guint32 remaining_size = alloc_size;
3261                 while (remaining_size >= 0x1000) {
3262                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
3263                         x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
3264                         remaining_size -= 0x1000;
3265                 }
3266                 if (remaining_size)
3267                         x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
3268 #else
3269                 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
3270 #endif
3271         }
3272
3273         /* compute max_offset in order to use short forward jumps */
3274         max_offset = 0;
3275         if (cfg->opt & MONO_OPT_BRANCH) {
3276                 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
3277                         MonoInst *ins = bb->code;
3278                         bb->max_offset = max_offset;
3279
3280                         if (cfg->prof_options & MONO_PROFILE_COVERAGE)
3281                                 max_offset += 6;
3282                         /* max alignment for loops */
3283                         if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
3284                                 max_offset += LOOP_ALIGNMENT;
3285
3286                         while (ins) {
3287                                 if (ins->opcode == OP_LABEL)
3288                                         ins->inst_c1 = max_offset;
3289                                 
3290                                 max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
3291                                 ins = ins->next;
3292                         }
3293                 }
3294         }
3295
3296         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3297                 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
3298
3299         /* load arguments allocated to register from the stack */
3300         sig = mono_method_signature (method);
3301         pos = 0;
3302
3303         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3304                 inst = cfg->varinfo [pos];
3305                 if (inst->opcode == OP_REGVAR) {
3306                         x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
3307                         if (cfg->verbose_level > 2)
3308                                 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
3309                 }
3310                 pos++;
3311         }
3312
3313         cfg->code_len = code - cfg->native_code;
3314
3315         return code;
3316 }
3317
3318 void
3319 mono_arch_emit_epilog (MonoCompile *cfg)
3320 {
3321         MonoMethod *method = cfg->method;
3322         MonoMethodSignature *sig = mono_method_signature (method);
3323         int quad, pos;
3324         guint32 stack_to_pop;
3325         guint8 *code;
3326         int max_epilog_size = 16;
3327         CallInfo *cinfo;
3328         
3329         if (cfg->method->save_lmf)
3330                 max_epilog_size += 128;
3331         
3332         if (mono_jit_trace_calls != NULL)
3333                 max_epilog_size += 50;
3334
3335         while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
3336                 cfg->code_size *= 2;
3337                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3338                 mono_jit_stats.code_reallocs++;
3339         }
3340
3341         code = cfg->native_code + cfg->code_len;
3342
3343         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3344                 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
3345
3346         /* the code restoring the registers must be kept in sync with CEE_JMP */
3347         pos = 0;
3348         
3349         if (method->save_lmf) {
3350                 gint32 prev_lmf_reg;
3351
3352                 /* Find a spare register */
3353                 switch (sig->ret->type) {
3354                 case MONO_TYPE_I8:
3355                 case MONO_TYPE_U8:
3356                         prev_lmf_reg = X86_EDI;
3357                         cfg->used_int_regs |= (1 << X86_EDI);
3358                         break;
3359                 default:
3360                         prev_lmf_reg = X86_EDX;
3361                         break;
3362                 }
3363
3364                 /* reg = previous_lmf */
3365                 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, -32, 4);
3366
3367                 /* ecx = lmf */
3368                 x86_mov_reg_membase (code, X86_ECX, X86_EBP, -28, 4);
3369
3370                 /* *(lmf) = previous_lmf */
3371                 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
3372
3373                 /* restore caller saved regs */
3374                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3375                         x86_mov_reg_membase (code, X86_EBX, X86_EBP, -20, 4);
3376                 }
3377
3378                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3379                         x86_mov_reg_membase (code, X86_EDI, X86_EBP, -16, 4);
3380                 }
3381                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3382                         x86_mov_reg_membase (code, X86_ESI, X86_EBP, -12, 4);
3383                 }
3384
3385                 /* EBP is restored by LEAVE */
3386         } else {
3387                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3388                         pos -= 4;
3389                 }
3390                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3391                         pos -= 4;
3392                 }
3393                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3394                         pos -= 4;
3395                 }
3396
3397                 if (pos)
3398                         x86_lea_membase (code, X86_ESP, X86_EBP, pos);
3399
3400                 if (cfg->used_int_regs & (1 << X86_ESI)) {
3401                         x86_pop_reg (code, X86_ESI);
3402                 }
3403                 if (cfg->used_int_regs & (1 << X86_EDI)) {
3404                         x86_pop_reg (code, X86_EDI);
3405                 }
3406                 if (cfg->used_int_regs & (1 << X86_EBX)) {
3407                         x86_pop_reg (code, X86_EBX);
3408                 }
3409         }
3410
3411         /* Load returned vtypes into registers if needed */
3412         cinfo = get_call_info (sig, FALSE);
3413         if (cinfo->ret.storage == ArgValuetypeInReg) {
3414                 for (quad = 0; quad < 2; quad ++) {
3415                         switch (cinfo->ret.pair_storage [quad]) {
3416                         case ArgInIReg:
3417                                 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
3418                                 break;
3419                         case ArgOnFloatFpStack:
3420                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
3421                                 break;
3422                         case ArgOnDoubleFpStack:
3423                                 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
3424                                 break;
3425                         case ArgNone:
3426                                 break;
3427                         default:
3428                                 g_assert_not_reached ();
3429                         }
3430                 }
3431         }
3432
3433         x86_leave (code);
3434
3435         if (CALLCONV_IS_STDCALL (sig)) {
3436                 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
3437
3438                 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
3439         } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
3440                 stack_to_pop = 4;
3441         else
3442                 stack_to_pop = 0;
3443
3444         if (stack_to_pop)
3445                 x86_ret_imm (code, stack_to_pop);
3446         else
3447                 x86_ret (code);
3448
3449         g_free (cinfo);
3450
3451         cfg->code_len = code - cfg->native_code;
3452
3453         g_assert (cfg->code_len < cfg->code_size);
3454 }
3455
3456 void
3457 mono_arch_emit_exceptions (MonoCompile *cfg)
3458 {
3459         MonoJumpInfo *patch_info;
3460         int nthrows, i;
3461         guint8 *code;
3462         MonoClass *exc_classes [16];
3463         guint8 *exc_throw_start [16], *exc_throw_end [16];
3464         guint32 code_size;
3465         int exc_count = 0;
3466
3467         /* Compute needed space */
3468         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3469                 if (patch_info->type == MONO_PATCH_INFO_EXC)
3470                         exc_count++;
3471         }
3472
3473         /* 
3474          * make sure we have enough space for exceptions
3475          * 16 is the size of two push_imm instructions and a call
3476          */
3477         if (cfg->compile_aot)
3478                 code_size = exc_count * 32;
3479         else
3480                 code_size = exc_count * 16;
3481
3482         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
3483                 cfg->code_size *= 2;
3484                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
3485                 mono_jit_stats.code_reallocs++;
3486         }
3487
3488         code = cfg->native_code + cfg->code_len;
3489
3490         nthrows = 0;
3491         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
3492                 switch (patch_info->type) {
3493                 case MONO_PATCH_INFO_EXC: {
3494                         MonoClass *exc_class;
3495                         guint8 *buf, *buf2;
3496                         guint32 throw_ip;
3497
3498                         x86_patch (patch_info->ip.i + cfg->native_code, code);
3499
3500                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
3501                         g_assert (exc_class);
3502                         throw_ip = patch_info->ip.i;
3503
3504                         /* Find a throw sequence for the same exception class */
3505                         for (i = 0; i < nthrows; ++i)
3506                                 if (exc_classes [i] == exc_class)
3507                                         break;
3508                         if (i < nthrows) {
3509                                 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
3510                                 x86_jump_code (code, exc_throw_start [i]);
3511                                 patch_info->type = MONO_PATCH_INFO_NONE;
3512                         }
3513                         else {
3514                                 guint32 got_reg = X86_EAX;
3515                                 guint32 size;
3516
3517                                 /* Compute size of code following the push <OFFSET> */
3518                                 if (cfg->compile_aot) {
3519                                         size = 5 + 6;
3520                                         if (!cfg->got_var)
3521                                                 size += 32;
3522                                         else if (cfg->got_var->opcode == OP_REGOFFSET)
3523                                                 size += 6;
3524                                 }
3525                                 else
3526                                         size = 5 + 5;
3527
3528                                 if ((code - cfg->native_code) - throw_ip < 126 - size) {
3529                                         /* Use the shorter form */
3530                                         buf = buf2 = code;
3531                                         x86_push_imm (code, 0);
3532                                 }
3533                                 else {
3534                                         buf = code;
3535                                         x86_push_imm (code, 0xf0f0f0f0);
3536                                         buf2 = code;
3537                                 }
3538
3539                                 if (nthrows < 16) {
3540                                         exc_classes [nthrows] = exc_class;
3541                                         exc_throw_start [nthrows] = code;
3542                                 }
3543
3544                                 if (cfg->compile_aot) {          
3545                                         /*
3546                                          * Since the patches are generated by the back end, there is                                     * no way to generate a got_var at this point.   
3547                                          */
3548                                         if (!cfg->got_var) {
3549                                                 x86_call_imm (code, 0);
3550                                                 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
3551                                                 x86_pop_reg (code, X86_EAX);
3552                                                 x86_alu_reg_imm (code, X86_ADD, X86_EAX, 0);
3553                                         }
3554                                         else {
3555                                                 if (cfg->got_var->opcode == OP_REGOFFSET)
3556                                                         x86_mov_reg_membase (code, X86_EAX, cfg->got_var->inst_basereg, cfg->got_var->inst_offset, 4);
3557                                                 else
3558                                                         got_reg = cfg->got_var->dreg;
3559                                         }
3560                                 }
3561
3562                                 x86_push_imm (code, exc_class->type_token);
3563                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
3564                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
3565                                 patch_info->ip.i = code - cfg->native_code;
3566                                 if (cfg->compile_aot)
3567                                         x86_call_membase (code, got_reg, 0xf0f0f0f0);
3568                                 else
3569                                         x86_call_code (code, 0);
3570                                 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
3571                                 while (buf < buf2)
3572                                         x86_nop (buf);
3573
3574                                 if (nthrows < 16) {
3575                                         exc_throw_end [nthrows] = code;
3576                                         nthrows ++;
3577                                 }
3578                         }
3579                         break;
3580                 }
3581                 default:
3582                         /* do nothing */
3583                         break;
3584                 }
3585         }
3586
3587         cfg->code_len = code - cfg->native_code;
3588
3589         g_assert (cfg->code_len < cfg->code_size);
3590 }
3591
3592 void
3593 mono_arch_flush_icache (guint8 *code, gint size)
3594 {
3595         /* not needed */
3596 }
3597
3598 void
3599 mono_arch_flush_register_windows (void)
3600 {
3601 }
3602
3603 /*
3604  * Support for fast access to the thread-local lmf structure using the GS
3605  * segment register on NPTL + kernel 2.6.x.
3606  */
3607
3608 static gboolean tls_offset_inited = FALSE;
3609
3610 void
3611 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
3612 {
3613         if (!tls_offset_inited) {
3614                 if (!getenv ("MONO_NO_TLS")) {
3615 #ifdef PLATFORM_WIN32
3616                         /* 
3617                          * We need to init this multiple times, since when we are first called, the key might not
3618                          * be initialized yet.
3619                          */
3620                         appdomain_tls_offset = mono_domain_get_tls_key ();
3621                         lmf_tls_offset = mono_get_jit_tls_key ();
3622                         thread_tls_offset = mono_thread_get_tls_key ();
3623
3624                         /* Only 64 tls entries can be accessed using inline code */
3625                         if (appdomain_tls_offset >= 64)
3626                                 appdomain_tls_offset = -1;
3627                         if (lmf_tls_offset >= 64)
3628                                 lmf_tls_offset = -1;
3629                         if (thread_tls_offset >= 64)
3630                                 thread_tls_offset = -1;
3631 #else
3632                         tls_offset_inited = TRUE;
3633                         appdomain_tls_offset = mono_domain_get_tls_offset ();
3634                         lmf_tls_offset = mono_get_lmf_tls_offset ();
3635                         thread_tls_offset = mono_thread_get_tls_offset ();
3636 #endif
3637                 }
3638         }               
3639 }
3640
3641 void
3642 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
3643 {
3644 }
3645
3646 void
3647 mono_arch_emit_this_vret_args (MonoCompile *cfg, MonoCallInst *inst, int this_reg, int this_type, int vt_reg)
3648 {
3649         MonoCallInst *call = (MonoCallInst*)inst;
3650         CallInfo *cinfo = get_call_info (inst->signature, FALSE);
3651
3652         /* add the this argument */
3653         if (this_reg != -1) {
3654                 if (cinfo->args [0].storage == ArgInIReg) {
3655                         MonoInst *this;
3656                         MONO_INST_NEW (cfg, this, OP_MOVE);
3657                         this->type = this_type;
3658                         this->sreg1 = this_reg;
3659                         this->dreg = mono_regstate_next_int (cfg->rs);
3660                         mono_bblock_add_inst (cfg->cbb, this);
3661
3662                         mono_call_inst_add_outarg_reg (call, this->dreg, cinfo->args [0].reg, FALSE);
3663                 }
3664                 else {
3665                         MonoInst *this;
3666                         MONO_INST_NEW (cfg, this, OP_OUTARG);
3667                         this->type = this_type;
3668                         this->sreg1 = this_reg;
3669                         mono_bblock_add_inst (cfg->cbb, this);
3670                 }
3671         }
3672
3673         if (vt_reg != -1) {
3674                 MonoInst *vtarg;
3675
3676                 if (cinfo->ret.storage == ArgValuetypeInReg) {
3677                         /*
3678                          * The valuetype is in EAX:EDX after the call, needs to be copied to
3679                          * the stack. Save the address here, so the call instruction can
3680                          * access it.
3681                          */
3682                         MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
3683                         vtarg->inst_destbasereg = X86_ESP;
3684                         vtarg->inst_offset = inst->stack_usage;
3685                         vtarg->sreg1 = vt_reg;
3686                         mono_bblock_add_inst (cfg->cbb, vtarg);
3687                 }
3688                 else if (cinfo->ret.storage == ArgInIReg) {
3689                         /* The return address is passed in a register */
3690                         MONO_INST_NEW (cfg, vtarg, OP_MOVE);
3691                         vtarg->sreg1 = vt_reg;
3692                         vtarg->dreg = mono_regstate_next_int (cfg->rs);
3693                         mono_bblock_add_inst (cfg->cbb, vtarg);
3694
3695                         mono_call_inst_add_outarg_reg (call, vtarg->dreg, cinfo->ret.reg, FALSE);
3696                 } else {
3697                         MonoInst *vtarg;
3698                         MONO_INST_NEW (cfg, vtarg, OP_OUTARG);
3699                         vtarg->type = STACK_MP;
3700                         vtarg->sreg1 = vt_reg;
3701                         mono_bblock_add_inst (cfg->cbb, vtarg);
3702                 }
3703         }
3704
3705         g_free (cinfo);
3706 }
3707
3708 MonoInst*
3709 mono_arch_get_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
3710 {
3711         MonoInst *ins = NULL;
3712
3713         if (cmethod->klass == mono_defaults.math_class) {
3714                 if (strcmp (cmethod->name, "Sin") == 0) {
3715                         MONO_INST_NEW (cfg, ins, OP_SIN);
3716                         ins->inst_i0 = args [0];
3717                 } else if (strcmp (cmethod->name, "Cos") == 0) {
3718                         MONO_INST_NEW (cfg, ins, OP_COS);
3719                         ins->inst_i0 = args [0];
3720                 } else if (strcmp (cmethod->name, "Tan") == 0) {
3721                         MONO_INST_NEW (cfg, ins, OP_TAN);
3722                         ins->inst_i0 = args [0];
3723                 } else if (strcmp (cmethod->name, "Atan") == 0) {
3724                         MONO_INST_NEW (cfg, ins, OP_ATAN);
3725                         ins->inst_i0 = args [0];
3726                 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
3727                         MONO_INST_NEW (cfg, ins, OP_SQRT);
3728                         ins->inst_i0 = args [0];
3729                 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
3730                         MONO_INST_NEW (cfg, ins, OP_ABS);
3731                         ins->inst_i0 = args [0];
3732                 }
3733 #if 0
3734                 /* OP_FREM is not IEEE compatible */
3735                 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
3736                         MONO_INST_NEW (cfg, ins, OP_FREM);
3737                         ins->inst_i0 = args [0];
3738                         ins->inst_i1 = args [1];
3739                 }
3740 #endif
3741         } else if (cmethod->klass == mono_defaults.thread_class &&
3742                            strcmp (cmethod->name, "MemoryBarrier") == 0) {
3743                 MONO_INST_NEW (cfg, ins, OP_MEMORY_BARRIER);
3744         } else if(cmethod->klass->image == mono_defaults.corlib &&
3745                            (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
3746                            (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
3747
3748                 if (strcmp (cmethod->name, "Increment") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3749                         MonoInst *ins_iconst;
3750
3751                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3752                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3753                         ins_iconst->inst_c0 = 1;
3754
3755                         ins->inst_i0 = args [0];
3756                         ins->inst_i1 = ins_iconst;
3757                 } else if (strcmp (cmethod->name, "Decrement") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3758                         MonoInst *ins_iconst;
3759
3760                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_NEW_I4);
3761                         MONO_INST_NEW (cfg, ins_iconst, OP_ICONST);
3762                         ins_iconst->inst_c0 = -1;
3763
3764                         ins->inst_i0 = args [0];
3765                         ins->inst_i1 = ins_iconst;
3766                 } else if (strcmp (cmethod->name, "Exchange") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3767                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_EXCHANGE_I4);
3768
3769                         ins->inst_i0 = args [0];
3770                         ins->inst_i1 = args [1];
3771                 } else if (strcmp (cmethod->name, "Add") == 0 && fsig->params [0]->type == MONO_TYPE_I4) {
3772                         MONO_INST_NEW (cfg, ins, OP_ATOMIC_ADD_I4);
3773
3774                         ins->inst_i0 = args [0];
3775                         ins->inst_i1 = args [1];
3776                 }
3777         }
3778
3779         return ins;
3780 }
3781
3782
3783 gboolean
3784 mono_arch_print_tree (MonoInst *tree, int arity)
3785 {
3786         return 0;
3787 }
3788
3789 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
3790 {
3791         MonoInst* ins;
3792         
3793         if (appdomain_tls_offset == -1)
3794                 return NULL;
3795
3796         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3797         ins->inst_offset = appdomain_tls_offset;
3798         return ins;
3799 }
3800
3801 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
3802 {
3803         MonoInst* ins;
3804
3805         if (thread_tls_offset == -1)
3806                 return NULL;
3807
3808         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
3809         ins->inst_offset = thread_tls_offset;
3810         return ins;
3811 }
3812
3813 guint32
3814 mono_arch_get_patch_offset (guint8 *code)
3815 {
3816         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
3817                 return 2;
3818         else if ((code [0] == 0xba))
3819                 return 1;
3820         else if ((code [0] == 0x68))
3821                 /* push IMM */
3822                 return 1;
3823         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
3824                 /* push <OFFSET>(<REG>) */
3825                 return 2;
3826         else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
3827                 /* call *<OFFSET>(<REG>) */
3828                 return 2;
3829         else if ((code [0] == 0xdd) || (code [0] == 0xd9))
3830                 /* fldl <ADDR> */
3831                 return 2;
3832         else if ((code [0] == 0x58) && (code [1] == 0x05))
3833                 /* pop %eax; add <OFFSET>, %eax */
3834                 return 2;
3835         else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
3836                 /* pop <REG>; add <OFFSET>, <REG> */
3837                 return 3;
3838         else {
3839                 g_assert_not_reached ();
3840                 return -1;
3841         }
3842 }
3843
3844 gpointer*
3845 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
3846 {
3847         guint8 reg = 0;
3848         gint32 disp = 0;
3849
3850         /* go to the start of the call instruction
3851          *
3852          * address_byte = (m << 6) | (o << 3) | reg
3853          * call opcode: 0xff address_byte displacement
3854          * 0xff m=1,o=2 imm8
3855          * 0xff m=2,o=2 imm32
3856          */
3857         code -= 6;
3858         if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
3859                 reg = code [4] & 0x07;
3860                 disp = (signed char)code [5];
3861         } else {
3862                 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
3863                         reg = code [1] & 0x07;
3864                         disp = *((gint32*)(code + 2));
3865                 } else if ((code [1] == 0xe8)) {
3866                         return NULL;
3867                 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
3868                         /*
3869                          * This is a interface call: should check the above code can't catch it earlier 
3870                          * 8b 40 30   mov    0x30(%eax),%eax
3871                          * ff 10      call   *(%eax)
3872                          */
3873                         disp = 0;
3874                         reg = code [5] & 0x07;
3875                 }
3876                 else
3877                         return NULL;
3878         }
3879
3880         return (gpointer*)(((gint32)(regs [reg])) + disp);
3881 }
3882
3883 gpointer* 
3884 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
3885 {
3886         guint8 reg = 0;
3887         gint32 disp = 0;
3888
3889         code -= 7;
3890         if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 3) && (x86_modrm_reg (code [1]) == X86_EAX) && (code [2] == 0x8b) && (code [3] == 0x40) && (code [5] == 0xff) && (code [6] == 0xd0)) {
3891                 reg = x86_modrm_rm (code [1]);
3892                 disp = code [4];
3893
3894                 if (reg == X86_EAX)
3895                         return NULL;
3896                 else
3897                         return (gpointer*)(((gint32)(regs [reg])) + disp);
3898         }
3899
3900         return NULL;
3901 }