2008-11-17 Zoltan Varga <vargaz@gmail.com>
[mono.git] / mono / mini / mini-ia64.c
1 /*
2  * mini-ia64.c: IA64 backend for the Mono code generator
3  *
4  * Authors:
5  *   Zoltan Varga (vargaz@gmail.com)
6  *
7  * (C) 2003 Ximian, Inc.
8  */
9 #include "mini.h"
10 #include <string.h>
11 #include <math.h>
12 #include <unistd.h>
13 #include <sys/mman.h>
14
15 #ifdef __INTEL_COMPILER
16 #include <ia64intrin.h>
17 #endif
18
19 #include <mono/metadata/appdomain.h>
20 #include <mono/metadata/debug-helpers.h>
21 #include <mono/metadata/threads.h>
22 #include <mono/metadata/profiler-private.h>
23 #include <mono/utils/mono-math.h>
24
25 #include "trace.h"
26 #include "mini-ia64.h"
27 #include "cpu-ia64.h"
28 #include "jit-icalls.h"
29
30 static gint appdomain_tls_offset = -1;
31 static gint thread_tls_offset = -1;
32
33 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
34
35 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
36
37 /*
38  * IA64 register usage:
39  * - local registers are used for global register allocation
40  * - r8..r11, r14..r30 is used for local register allocation
41  * - r31 is a scratch register used within opcode implementations
42  * - FIXME: Use out registers as well
43  * - the first three locals are used for saving ar.pfst, b0, and sp
44  * - compare instructions allways set p6 and p7
45  */
46
47 /*
48  * There are a lot of places where generated code is disassembled/patched.
49  * The automatic bundling of instructions done by the code generation macros
50  * could complicate things, so it is best to call 
51  * ia64_codegen_set_one_ins_per_bundle () at those places.
52  */
53
54 #define ARGS_OFFSET 16
55
56 #define GP_SCRATCH_REG 31
57 #define GP_SCRATCH_REG2 30
58 #define FP_SCRATCH_REG 32
59 #define FP_SCRATCH_REG2 33
60
61 #define LOOP_ALIGNMENT 8
62 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
63
64 static const char* gregs [] = {
65         "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9",
66         "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19",
67         "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29",
68         "r30", "r31", "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
69         "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", "r48", "r49",
70         "r50", "r51", "r52", "r53", "r54", "r55", "r56", "r57", "r58", "r59",
71         "r60", "r61", "r62", "r63", "r64", "r65", "r66", "r67", "r68", "r69",
72         "r70", "r71", "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
73         "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", "r88", "r89",
74         "r90", "r91", "r92", "r93", "r94", "r95", "r96", "r97", "r98", "r99",
75         "r100", "r101", "r102", "r103", "r104", "r105", "r106", "r107", "r108", "r109",
76         "r110", "r111", "r112", "r113", "r114", "r115", "r116", "r117", "r118", "r119",
77         "r120", "r121", "r122", "r123", "r124", "r125", "r126", "r127"
78 };
79
80 const char*
81 mono_arch_regname (int reg)
82 {
83         if (reg < 128)
84                 return gregs [reg];
85         else
86                 return "unknown";
87 }
88
89 static const char* fregs [] = {
90         "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9",
91         "f10", "f11", "f12", "f13", "f14", "f15", "f16", "f17", "f18", "f19",
92         "f20", "f21", "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29",
93         "f30", "f31", "f32", "f33", "f34", "f35", "f36", "f37", "f38", "f39",
94         "f40", "f41", "f42", "f43", "f44", "f45", "f46", "f47", "f48", "f49",
95         "f50", "f51", "f52", "f53", "f54", "f55", "f56", "f57", "f58", "f59",
96         "f60", "f61", "f62", "f63", "f64", "f65", "f66", "f67", "f68", "f69",
97         "f70", "f71", "f72", "f73", "f74", "f75", "f76", "f77", "f78", "f79",
98         "f80", "f81", "f82", "f83", "f84", "f85", "f86", "f87", "f88", "f89",
99         "f90", "f91", "f92", "f93", "f94", "f95", "f96", "f97", "f98", "f99",
100         "f100", "f101", "f102", "f103", "f104", "f105", "f106", "f107", "f108", "f109",
101         "f110", "f111", "f112", "f113", "f114", "f115", "f116", "f117", "f118", "f119",
102         "f120", "f121", "f122", "f123", "f124", "f125", "f126", "f127"
103 };
104
105 const char*
106 mono_arch_fregname (int reg)
107 {
108         if (reg < 128)
109                 return fregs [reg];
110         else
111                 return "unknown";
112 }
113
114 G_GNUC_UNUSED static void
115 break_count (void)
116 {
117 }
118
119 G_GNUC_UNUSED static gboolean
120 debug_count (void)
121 {
122         static int count = 0;
123         count ++;
124
125         if (count == atoi (getenv ("COUNT"))) {
126                 break_count ();
127         }
128
129         if (count > atoi (getenv ("COUNT"))) {
130                 return FALSE;
131         }
132
133         return TRUE;
134 }
135
136 static gboolean
137 debug_ins_sched (void)
138 {
139 #if 0
140         return debug_count ();
141 #else
142         return TRUE;
143 #endif
144 }
145
146 static gboolean
147 debug_omit_fp (void)
148 {
149 #if 0
150         return debug_count ();
151 #else
152         return TRUE;
153 #endif
154 }
155
156 static void 
157 ia64_patch (unsigned char* code, gpointer target);
158
159 typedef enum {
160         ArgInIReg,
161         ArgInFloatReg,
162         ArgInFloatRegR4,
163         ArgOnStack,
164         ArgValuetypeAddrInIReg,
165         ArgAggregate,
166         ArgSingleHFA,
167         ArgDoubleHFA,
168         ArgNone
169 } ArgStorage;
170
171 typedef enum {
172         AggregateNormal,
173         AggregateSingleHFA,
174         AggregateDoubleHFA
175 } AggregateType;
176
177 typedef struct {
178         gint16 offset;
179         gint8  reg;
180         ArgStorage storage;
181
182         /* Only if storage == ArgAggregate */
183         int nregs, nslots;
184         AggregateType atype;
185 } ArgInfo;
186
187 typedef struct {
188         int nargs;
189         guint32 stack_usage;
190         guint32 reg_usage;
191         guint32 freg_usage;
192         gboolean need_stack_align;
193         ArgInfo ret;
194         ArgInfo sig_cookie;
195         ArgInfo args [1];
196 } CallInfo;
197
198 #define DEBUG(a) if (cfg->verbose_level > 1) a
199
200 #define NEW_ICONST(cfg,dest,val) do {   \
201                 (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst));       \
202                 (dest)->opcode = OP_ICONST;     \
203                 (dest)->inst_c0 = (val);        \
204                 (dest)->type = STACK_I4;        \
205         } while (0)
206
207 #define PARAM_REGS 8
208
209 static void inline
210 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
211 {
212     ainfo->offset = *stack_size;
213
214     if (*gr >= PARAM_REGS) {
215                 ainfo->storage = ArgOnStack;
216                 (*stack_size) += sizeof (gpointer);
217     }
218     else {
219                 ainfo->storage = ArgInIReg;
220                 ainfo->reg = *gr;
221                 *(gr) += 1;
222     }
223 }
224
225 #define FLOAT_PARAM_REGS 8
226
227 static void inline
228 add_float (guint32 *gr, guint32 *fr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
229 {
230     ainfo->offset = *stack_size;
231
232     if (*gr >= PARAM_REGS) {
233                 ainfo->storage = ArgOnStack;
234                 (*stack_size) += sizeof (gpointer);
235     }
236     else {
237                 ainfo->storage = is_double ? ArgInFloatReg : ArgInFloatRegR4;
238                 ainfo->reg = 8 + *fr;
239                 (*fr) += 1;
240                 (*gr) += 1;
241     }
242 }
243
244 static void
245 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
246                gboolean is_return,
247                guint32 *gr, guint32 *fr, guint32 *stack_size)
248 {
249         guint32 size, i;
250         MonoClass *klass;
251         MonoMarshalType *info;
252         gboolean is_hfa = TRUE;
253         guint32 hfa_type = 0;
254
255         klass = mono_class_from_mono_type (type);
256         if (type->type == MONO_TYPE_TYPEDBYREF)
257                 size = 3 * sizeof (gpointer);
258         else if (sig->pinvoke) 
259                 size = mono_type_native_stack_size (&klass->byval_arg, NULL);
260         else 
261                 size = mini_type_stack_size (gsctx, &klass->byval_arg, NULL);
262
263         if (!sig->pinvoke || (size == 0)) {
264                 /* Allways pass in memory */
265                 ainfo->offset = *stack_size;
266                 *stack_size += ALIGN_TO (size, 8);
267                 ainfo->storage = ArgOnStack;
268
269                 return;
270         }
271
272         /* Determine whenever it is a HFA (Homogeneous Floating Point Aggregate) */
273         info = mono_marshal_load_type_info (klass);
274         g_assert (info);
275         for (i = 0; i < info->num_fields; ++i) {
276                 guint32 ftype = info->fields [i].field->type->type;
277                 if (!(info->fields [i].field->type->byref) && 
278                         ((ftype == MONO_TYPE_R4) || (ftype == MONO_TYPE_R8))) {
279                         if (hfa_type == 0)
280                                 hfa_type = ftype;
281                         else if (hfa_type != ftype)
282                                 is_hfa = FALSE;
283                 }
284                 else
285                         is_hfa = FALSE;
286         }
287         if (hfa_type == 0)
288                 is_hfa = FALSE;
289
290         ainfo->storage = ArgAggregate;
291         ainfo->atype = AggregateNormal;
292
293         if (is_hfa) {
294                 ainfo->atype = hfa_type == MONO_TYPE_R4 ? AggregateSingleHFA : AggregateDoubleHFA;
295                 if (is_return) {
296                         if (info->num_fields <= 8) {
297                                 ainfo->reg = 8;
298                                 ainfo->nregs = info->num_fields;
299                                 ainfo->nslots = ainfo->nregs;
300                                 return;
301                         }
302                         /* Fall through */
303                 }
304                 else {
305                         if ((*fr) + info->num_fields > 8)
306                                 NOT_IMPLEMENTED;
307
308                         ainfo->reg = 8 + (*fr);
309                         ainfo->nregs = info->num_fields;
310                         ainfo->nslots = ainfo->nregs;
311                         (*fr) += info->num_fields;
312                         if (ainfo->atype == AggregateSingleHFA) {
313                                 /*
314                                  * FIXME: Have to keep track of the parameter slot number, which is
315                                  * not the same as *gr.
316                                  */
317                                 (*gr) += ALIGN_TO (info->num_fields, 2) / 2;
318                         } else {
319                                 (*gr) += info->num_fields;
320                         }
321                         return;
322                 }
323         }
324
325         /* This also handles returning of TypedByRef used by some icalls */
326         if (is_return) {
327                 if (size <= 32) {
328                         ainfo->reg = IA64_R8;
329                         ainfo->nregs = (size + 7) / 8;
330                         ainfo->nslots = ainfo->nregs;
331                         return;
332                 }
333                 NOT_IMPLEMENTED;
334         }
335
336         ainfo->reg = (*gr);
337         ainfo->offset = *stack_size;
338         ainfo->nslots = (size + 7) / 8;
339
340         if (((*gr) + ainfo->nslots) <= 8) {
341                 /* Fits entirely in registers */
342                 ainfo->nregs = ainfo->nslots;
343                 (*gr) += ainfo->nregs;
344                 return;
345         }
346
347         ainfo->nregs = 8 - (*gr);
348         (*gr) = 8;
349         (*stack_size) += (ainfo->nslots - ainfo->nregs) * 8;
350 }
351
352 /*
353  * get_call_info:
354  *
355  *  Obtain information about a call according to the calling convention.
356  * For IA64, see the "Itanium Software Conventions and Runtime Architecture
357  * Gude" document for more information.
358  */
359 static CallInfo*
360 get_call_info (MonoCompile *cfg, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
361 {
362         guint32 i, gr, fr;
363         MonoType *ret_type;
364         int n = sig->hasthis + sig->param_count;
365         guint32 stack_size = 0;
366         CallInfo *cinfo;
367         MonoGenericSharingContext *gsctx = cfg ? cfg->generic_sharing_context : NULL;
368
369         if (mp)
370                 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
371         else
372                 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
373
374         gr = 0;
375         fr = 0;
376
377         /* return value */
378         {
379                 ret_type = mono_type_get_underlying_type (sig->ret);
380                 ret_type = mini_get_basic_type_from_generic (gsctx, ret_type);
381                 switch (ret_type->type) {
382                 case MONO_TYPE_BOOLEAN:
383                 case MONO_TYPE_I1:
384                 case MONO_TYPE_U1:
385                 case MONO_TYPE_I2:
386                 case MONO_TYPE_U2:
387                 case MONO_TYPE_CHAR:
388                 case MONO_TYPE_I4:
389                 case MONO_TYPE_U4:
390                 case MONO_TYPE_I:
391                 case MONO_TYPE_U:
392                 case MONO_TYPE_PTR:
393                 case MONO_TYPE_FNPTR:
394                 case MONO_TYPE_CLASS:
395                 case MONO_TYPE_OBJECT:
396                 case MONO_TYPE_SZARRAY:
397                 case MONO_TYPE_ARRAY:
398                 case MONO_TYPE_STRING:
399                         cinfo->ret.storage = ArgInIReg;
400                         cinfo->ret.reg = IA64_R8;
401                         break;
402                 case MONO_TYPE_U8:
403                 case MONO_TYPE_I8:
404                         cinfo->ret.storage = ArgInIReg;
405                         cinfo->ret.reg = IA64_R8;
406                         break;
407                 case MONO_TYPE_R4:
408                 case MONO_TYPE_R8:
409                         cinfo->ret.storage = ArgInFloatReg;
410                         cinfo->ret.reg = 8;
411                         break;
412                 case MONO_TYPE_GENERICINST:
413                         if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
414                                 cinfo->ret.storage = ArgInIReg;
415                                 cinfo->ret.reg = IA64_R8;
416                                 break;
417                         }
418                         /* Fall through */
419                 case MONO_TYPE_VALUETYPE:
420                 case MONO_TYPE_TYPEDBYREF: {
421                         guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
422
423                         if (sig->ret->byref) {
424                                 /* This seems to happen with ldfld wrappers */
425                                 cinfo->ret.storage = ArgInIReg;
426                         } else {
427                                 add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
428                                 if (cinfo->ret.storage == ArgOnStack)
429                                         /* The caller passes the address where the value is stored */
430                                         add_general (&gr, &stack_size, &cinfo->ret);
431                                 if (cinfo->ret.storage == ArgInIReg)
432                                         cinfo->ret.storage = ArgValuetypeAddrInIReg;
433                         }
434                         break;
435                 }
436                 case MONO_TYPE_VOID:
437                         cinfo->ret.storage = ArgNone;
438                         break;
439                 default:
440                         g_error ("Can't handle as return value 0x%x", sig->ret->type);
441                 }
442         }
443
444         /* this */
445         if (sig->hasthis)
446                 add_general (&gr, &stack_size, cinfo->args + 0);
447
448         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
449                 gr = PARAM_REGS;
450                 fr = FLOAT_PARAM_REGS;
451                 
452                 /* Emit the signature cookie just before the implicit arguments */
453                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
454         }
455
456         for (i = 0; i < sig->param_count; ++i) {
457                 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
458                 MonoType *ptype;
459
460                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
461                         /* We allways pass the sig cookie on the stack for simplicity */
462                         /* 
463                          * Prevent implicit arguments + the sig cookie from being passed 
464                          * in registers.
465                          */
466                         gr = PARAM_REGS;
467                         fr = FLOAT_PARAM_REGS;
468
469                         /* Emit the signature cookie just before the implicit arguments */
470                         add_general (&gr, &stack_size, &cinfo->sig_cookie);
471                 }
472
473                 if (sig->params [i]->byref) {
474                         add_general (&gr, &stack_size, ainfo);
475                         continue;
476                 }
477                 ptype = mono_type_get_underlying_type (sig->params [i]);
478                 ptype = mini_get_basic_type_from_generic (gsctx, ptype);
479                 switch (ptype->type) {
480                 case MONO_TYPE_BOOLEAN:
481                 case MONO_TYPE_I1:
482                 case MONO_TYPE_U1:
483                         add_general (&gr, &stack_size, ainfo);
484                         break;
485                 case MONO_TYPE_I2:
486                 case MONO_TYPE_U2:
487                 case MONO_TYPE_CHAR:
488                         add_general (&gr, &stack_size, ainfo);
489                         break;
490                 case MONO_TYPE_I4:
491                 case MONO_TYPE_U4:
492                         add_general (&gr, &stack_size, ainfo);
493                         break;
494                 case MONO_TYPE_I:
495                 case MONO_TYPE_U:
496                 case MONO_TYPE_PTR:
497                 case MONO_TYPE_FNPTR:
498                 case MONO_TYPE_CLASS:
499                 case MONO_TYPE_OBJECT:
500                 case MONO_TYPE_STRING:
501                 case MONO_TYPE_SZARRAY:
502                 case MONO_TYPE_ARRAY:
503                         add_general (&gr, &stack_size, ainfo);
504                         break;
505                 case MONO_TYPE_GENERICINST:
506                         if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
507                                 add_general (&gr, &stack_size, ainfo);
508                                 break;
509                         }
510                         /* Fall through */
511                 case MONO_TYPE_VALUETYPE:
512                 case MONO_TYPE_TYPEDBYREF:
513                         /* FIXME: */
514                         /* We allways pass valuetypes on the stack */
515                         add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
516                         break;
517                 case MONO_TYPE_U8:
518                 case MONO_TYPE_I8:
519                         add_general (&gr, &stack_size, ainfo);
520                         break;
521                 case MONO_TYPE_R4:
522                         add_float (&gr, &fr, &stack_size, ainfo, FALSE);
523                         break;
524                 case MONO_TYPE_R8:
525                         add_float (&gr, &fr, &stack_size, ainfo, TRUE);
526                         break;
527                 default:
528                         g_assert_not_reached ();
529                 }
530         }
531
532         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
533                 gr = PARAM_REGS;
534                 fr = FLOAT_PARAM_REGS;
535                 
536                 /* Emit the signature cookie just before the implicit arguments */
537                 add_general (&gr, &stack_size, &cinfo->sig_cookie);
538         }
539
540         cinfo->stack_usage = stack_size;
541         cinfo->reg_usage = gr;
542         cinfo->freg_usage = fr;
543         return cinfo;
544 }
545
546 /*
547  * mono_arch_get_argument_info:
548  * @csig:  a method signature
549  * @param_count: the number of parameters to consider
550  * @arg_info: an array to store the result infos
551  *
552  * Gathers information on parameters such as size, alignment and
553  * padding. arg_info should be large enought to hold param_count + 1 entries. 
554  *
555  * Returns the size of the argument area on the stack.
556  */
557 int
558 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
559 {
560         int k;
561         CallInfo *cinfo = get_call_info (NULL, NULL, csig, FALSE);
562         guint32 args_size = cinfo->stack_usage;
563
564         /* The arguments are saved to a stack area in mono_arch_instrument_prolog */
565         if (csig->hasthis) {
566                 arg_info [0].offset = 0;
567         }
568
569         for (k = 0; k < param_count; k++) {
570                 arg_info [k + 1].offset = ((k + csig->hasthis) * 8);
571                 /* FIXME: */
572                 arg_info [k + 1].size = 0;
573         }
574
575         g_free (cinfo);
576
577         return args_size;
578 }
579
580 /*
581  * Initialize the cpu to execute managed code.
582  */
583 void
584 mono_arch_cpu_init (void)
585 {
586 }
587
588 /*
589  * Initialize architecture specific code.
590  */
591 void
592 mono_arch_init (void)
593 {
594 }
595
596 /*
597  * Cleanup architecture specific code.
598  */
599 void
600 mono_arch_cleanup (void)
601 {
602 }
603
604 /*
605  * This function returns the optimizations supported on this cpu.
606  */
607 guint32
608 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
609 {
610         *exclude_mask = 0;
611
612         return 0;
613 }
614
615 GList *
616 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
617 {
618         GList *vars = NULL;
619         int i;
620         MonoMethodSignature *sig;
621         MonoMethodHeader *header;
622         CallInfo *cinfo;
623
624         header = mono_method_get_header (cfg->method);
625
626         sig = mono_method_signature (cfg->method);
627
628         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
629
630         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
631                 MonoInst *ins = cfg->args [i];
632
633                 ArgInfo *ainfo = &cinfo->args [i];
634
635                 if (ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT))
636                         continue;
637
638                 if (ainfo->storage == ArgInIReg) {
639                         /* The input registers are non-volatile */
640                         ins->opcode = OP_REGVAR;
641                         ins->dreg = 32 + ainfo->reg;
642                 }
643         }
644
645         for (i = 0; i < cfg->num_varinfo; i++) {
646                 MonoInst *ins = cfg->varinfo [i];
647                 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
648
649                 /* unused vars */
650                 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
651                         continue;
652
653                 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) || 
654                     (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
655                         continue;
656
657                 if (mono_is_regsize_var (ins->inst_vtype)) {
658                         g_assert (MONO_VARINFO (cfg, i)->reg == -1);
659                         g_assert (i == vmv->idx);
660                         vars = g_list_prepend (vars, vmv);
661                 }
662         }
663
664         vars = mono_varlist_sort (cfg, vars, 0);
665
666         return vars;
667 }
668
669 static void
670 mono_ia64_alloc_stacked_registers (MonoCompile *cfg)
671 {
672         CallInfo *cinfo;
673         guint32 reserved_regs;
674         MonoMethodHeader *header;
675
676         if (cfg->arch.reg_local0 > 0)
677                 /* Already done */
678                 return;
679
680         cinfo = get_call_info (cfg, cfg->mempool, mono_method_signature (cfg->method), FALSE);
681
682         header = mono_method_get_header (cfg->method);
683         
684         /* Some registers are reserved for use by the prolog/epilog */
685         reserved_regs = header->num_clauses ? 4 : 3;
686
687         if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
688                 (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)) {
689                 /* One registers is needed by instrument_epilog to save the return value */
690                 reserved_regs ++;
691                 if (cinfo->reg_usage < 2)
692                         /* Number of arguments passed to function call in instrument_prolog */
693                         cinfo->reg_usage = 2;
694         }
695
696         cfg->arch.reg_in0 = 32;
697         cfg->arch.reg_local0 = cfg->arch.reg_in0 + cinfo->reg_usage + reserved_regs;
698         cfg->arch.reg_out0 = cfg->arch.reg_local0 + 16;
699
700         cfg->arch.reg_saved_ar_pfs = cfg->arch.reg_local0 - 1;
701         cfg->arch.reg_saved_b0 = cfg->arch.reg_local0 - 2;
702         cfg->arch.reg_fp = cfg->arch.reg_local0 - 3;
703
704         /* 
705          * Frames without handlers save sp to fp, frames with handlers save it into
706          * a dedicated register.
707          */
708         if (header->num_clauses)
709                 cfg->arch.reg_saved_sp = cfg->arch.reg_local0 - 4;
710         else
711                 cfg->arch.reg_saved_sp = cfg->arch.reg_fp;
712
713         if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
714                 (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)) {
715                 cfg->arch.reg_saved_return_val = cfg->arch.reg_local0 - reserved_regs;
716         }
717
718         /* 
719          * Need to allocate at least 2 out register for use by OP_THROW / the system
720          * exception throwing code.
721          */
722         cfg->arch.n_out_regs = MAX (cfg->arch.n_out_regs, 2);
723 }
724
725 GList *
726 mono_arch_get_global_int_regs (MonoCompile *cfg)
727 {
728         GList *regs = NULL;
729         int i;
730
731         mono_ia64_alloc_stacked_registers (cfg);
732
733         for (i = cfg->arch.reg_local0; i < cfg->arch.reg_out0; ++i) {
734                 /* FIXME: regmask */
735                 g_assert (i < 64);
736                 regs = g_list_prepend (regs, (gpointer)(gssize)(i));
737         }
738
739         return regs;
740 }
741
742 /*
743  * mono_arch_regalloc_cost:
744  *
745  *  Return the cost, in number of memory references, of the action of 
746  * allocating the variable VMV into a register during global register
747  * allocation.
748  */
749 guint32
750 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
751 {
752         /* FIXME: Increase costs linearly to avoid using all local registers */
753
754         return 0;
755 }
756  
757 void
758 mono_arch_allocate_vars (MonoCompile *cfg)
759 {
760         MonoMethodSignature *sig;
761         MonoMethodHeader *header;
762         MonoInst *inst;
763         int i, offset;
764         guint32 locals_stack_size, locals_stack_align;
765         gint32 *offsets;
766         CallInfo *cinfo;
767
768         header = mono_method_get_header (cfg->method);
769
770         sig = mono_method_signature (cfg->method);
771
772         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
773
774         /*
775          * Determine whenever the frame pointer can be eliminated.
776          * FIXME: Remove some of the restrictions.
777          */
778         cfg->arch.omit_fp = TRUE;
779
780         if (!debug_omit_fp ())
781                 cfg->arch.omit_fp = FALSE;
782
783         if (cfg->flags & MONO_CFG_HAS_ALLOCA)
784                 cfg->arch.omit_fp = FALSE;
785         if (header->num_clauses)
786                 cfg->arch.omit_fp = FALSE;
787         if (cfg->param_area)
788                 cfg->arch.omit_fp = FALSE;
789         if ((sig->ret->type != MONO_TYPE_VOID) && (cinfo->ret.storage == ArgAggregate))
790                 cfg->arch.omit_fp = FALSE;
791         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
792                 cfg->arch.omit_fp = FALSE;
793         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
794                 ArgInfo *ainfo = &cinfo->args [i];
795
796                 if (ainfo->storage == ArgOnStack) {
797                         /* 
798                          * The stack offset can only be determined when the frame
799                          * size is known.
800                          */
801                         cfg->arch.omit_fp = FALSE;
802                 }
803         }
804
805         mono_ia64_alloc_stacked_registers (cfg);
806
807         /*
808          * We use the ABI calling conventions for managed code as well.
809          * Exception: valuetypes are never passed or returned in registers.
810          */
811
812         if (cfg->arch.omit_fp) {
813                 cfg->flags |= MONO_CFG_HAS_SPILLUP;
814                 cfg->frame_reg = IA64_SP;
815                 offset = ARGS_OFFSET;
816         }
817         else {
818                 /* Locals are allocated backwards from %fp */
819                 cfg->frame_reg = cfg->arch.reg_fp;
820                 offset = 0;
821         }
822
823         if (cfg->method->save_lmf) {
824                 /* No LMF on IA64 */
825         }
826
827         if (sig->ret->type != MONO_TYPE_VOID) {
828                 switch (cinfo->ret.storage) {
829                 case ArgInIReg:
830                         cfg->ret->opcode = OP_REGVAR;
831                         cfg->ret->inst_c0 = cinfo->ret.reg;
832                         break;
833                 case ArgInFloatReg:
834                         cfg->ret->opcode = OP_REGVAR;
835                         cfg->ret->inst_c0 = cinfo->ret.reg;
836                         break;
837                 case ArgValuetypeAddrInIReg:
838                         cfg->vret_addr->opcode = OP_REGVAR;
839                         cfg->vret_addr->dreg = cfg->arch.reg_in0 + cinfo->ret.reg;
840                         break;
841                 case ArgAggregate:
842                         /* Allocate a local to hold the result, the epilog will copy it to the correct place */
843                         if (cfg->arch.omit_fp)
844                                 g_assert_not_reached ();
845                         offset = ALIGN_TO (offset, 8);
846                         offset += cinfo->ret.nslots * 8;
847                         cfg->ret->opcode = OP_REGOFFSET;
848                         cfg->ret->inst_basereg = cfg->frame_reg;
849                         cfg->ret->inst_offset = - offset;
850                         break;
851                 default:
852                         g_assert_not_reached ();
853                 }
854                 cfg->ret->dreg = cfg->ret->inst_c0;
855         }
856
857         /* Allocate locals */
858         offsets = mono_allocate_stack_slots_full (cfg, cfg->arch.omit_fp ? FALSE : TRUE, &locals_stack_size, &locals_stack_align);
859         if (locals_stack_align) {
860                 offset = ALIGN_TO (offset, locals_stack_align);
861         }
862         for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
863                 if (offsets [i] != -1) {
864                         MonoInst *inst = cfg->varinfo [i];
865                         inst->opcode = OP_REGOFFSET;
866                         inst->inst_basereg = cfg->frame_reg;
867                         if (cfg->arch.omit_fp)
868                                 inst->inst_offset = (offset + offsets [i]);
869                         else
870                                 inst->inst_offset = - (offset + offsets [i]);
871                         // printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
872                 }
873         }
874         offset += locals_stack_size;
875
876         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG)) {
877                 if (cfg->arch.omit_fp)
878                         g_assert_not_reached ();
879                 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
880                 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
881         }
882
883         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
884                 inst = cfg->args [i];
885                 if (inst->opcode != OP_REGVAR) {
886                         ArgInfo *ainfo = &cinfo->args [i];
887                         gboolean inreg = TRUE;
888                         MonoType *arg_type;
889
890                         if (sig->hasthis && (i == 0))
891                                 arg_type = &mono_defaults.object_class->byval_arg;
892                         else
893                                 arg_type = sig->params [i - sig->hasthis];
894
895                         /* FIXME: VOLATILE is only set if the liveness pass runs */
896                         if (inst->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT))
897                                 inreg = FALSE;
898
899                         inst->opcode = OP_REGOFFSET;
900
901                         switch (ainfo->storage) {
902                         case ArgInIReg:
903                                 inst->opcode = OP_REGVAR;
904                                 inst->dreg = cfg->arch.reg_in0 + ainfo->reg;
905                                 break;
906                         case ArgInFloatReg:
907                         case ArgInFloatRegR4:
908                                 /* 
909                                  * Since float regs are volatile, we save the arguments to
910                                  * the stack in the prolog.
911                                  */
912                                 inreg = FALSE;
913                                 break;
914                         case ArgOnStack:
915                                 if (cfg->arch.omit_fp)
916                                         g_assert_not_reached ();
917                                 inst->opcode = OP_REGOFFSET;
918                                 inst->inst_basereg = cfg->frame_reg;
919                                 inst->inst_offset = ARGS_OFFSET + ainfo->offset;
920                                 break;
921                         case ArgAggregate:
922                                 inreg = FALSE;
923                                 break;
924                         default:
925                                 NOT_IMPLEMENTED;
926                         }
927
928                         if (!inreg && (ainfo->storage != ArgOnStack)) {
929                                 guint32 size = 0;
930
931                                 inst->opcode = OP_REGOFFSET;
932                                 inst->inst_basereg = cfg->frame_reg;
933                                 /* These arguments are saved to the stack in the prolog */
934                                 switch (ainfo->storage) {
935                                 case ArgAggregate:
936                                         if (ainfo->atype == AggregateSingleHFA)
937                                                 size = ainfo->nslots * 4;
938                                         else
939                                                 size = ainfo->nslots * 8;
940                                         break;
941                                 default:
942                                         size = sizeof (gpointer);
943                                         break;
944                                 }
945
946                                 offset = ALIGN_TO (offset, sizeof (gpointer));
947
948                                 if (cfg->arch.omit_fp) {
949                                         inst->inst_offset = offset;
950                                         offset += size;
951                                 } else {
952                                         offset += size;
953                                         inst->inst_offset = - offset;
954                                 }
955                         }
956                 }
957         }
958
959         /* 
960          * FIXME: This doesn't work because some variables are allocated during local
961          * regalloc.
962          */
963         /*
964         if (cfg->arch.omit_fp && offset == 16)
965                 offset = 0;
966         */
967
968         cfg->stack_offset = offset;
969 }
970
971 void
972 mono_arch_create_vars (MonoCompile *cfg)
973 {
974         MonoMethodSignature *sig;
975         CallInfo *cinfo;
976
977         sig = mono_method_signature (cfg->method);
978
979         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
980
981         if (cinfo->ret.storage == ArgAggregate)
982                 cfg->ret_var_is_local = TRUE;
983         if (cinfo->ret.storage == ArgValuetypeAddrInIReg) {
984                 cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
985                 if (G_UNLIKELY (cfg->verbose_level > 1)) {
986                         printf ("vret_addr = ");
987                         mono_print_ins (cfg->vret_addr);
988                 }
989         }
990 }
991
992 static void
993 add_outarg_reg (MonoCompile *cfg, MonoCallInst *call, ArgStorage storage, int reg, MonoInst *tree)
994 {
995         MonoInst *arg;
996
997         MONO_INST_NEW (cfg, arg, OP_NOP);
998         arg->sreg1 = tree->dreg;
999
1000         switch (storage) {
1001         case ArgInIReg:
1002                 arg->opcode = OP_MOVE;
1003                 arg->dreg = mono_alloc_ireg (cfg);
1004
1005                 mono_call_inst_add_outarg_reg (cfg, call, arg->dreg, reg, FALSE);
1006                 break;
1007         case ArgInFloatReg:
1008                 arg->opcode = OP_FMOVE;
1009                 arg->dreg = mono_alloc_freg (cfg);
1010
1011                 mono_call_inst_add_outarg_reg (cfg, call, arg->dreg, reg, TRUE);
1012                 break;
1013         case ArgInFloatRegR4:
1014                 arg->opcode = OP_FCONV_TO_R4;
1015                 arg->dreg = mono_alloc_freg (cfg);
1016
1017                 mono_call_inst_add_outarg_reg (cfg, call, arg->dreg, reg, TRUE);
1018                 break;
1019         default:
1020                 g_assert_not_reached ();
1021         }
1022
1023         MONO_ADD_INS (cfg->cbb, arg);
1024 }
1025
1026 static void
1027 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
1028 {
1029         MonoMethodSignature *tmp_sig;
1030
1031         /* Emit the signature cookie just before the implicit arguments */
1032         MonoInst *sig_arg;
1033         /* FIXME: Add support for signature tokens to AOT */
1034         cfg->disable_aot = TRUE;
1035
1036         g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1037
1038         /*
1039          * mono_ArgIterator_Setup assumes the signature cookie is 
1040          * passed first and all the arguments which were before it are
1041          * passed on the stack after the signature. So compensate by 
1042          * passing a different signature.
1043          */
1044         tmp_sig = mono_metadata_signature_dup (call->signature);
1045         tmp_sig->param_count -= call->signature->sentinelpos;
1046         tmp_sig->sentinelpos = 0;
1047         memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1048
1049         MONO_INST_NEW (cfg, sig_arg, OP_ICONST);
1050         sig_arg->dreg = mono_alloc_ireg (cfg);
1051         sig_arg->inst_p0 = tmp_sig;
1052         MONO_ADD_INS (cfg->cbb, sig_arg);
1053
1054         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI8_MEMBASE_REG, IA64_SP, 16 + cinfo->sig_cookie.offset, sig_arg->dreg);
1055 }
1056
1057 void
1058 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
1059 {
1060         MonoInst *in;
1061         MonoMethodSignature *sig;
1062         int i, n, stack_size;
1063         CallInfo *cinfo;
1064         ArgInfo *ainfo;
1065
1066         stack_size = 0;
1067
1068         mono_ia64_alloc_stacked_registers (cfg);
1069
1070         sig = call->signature;
1071         n = sig->param_count + sig->hasthis;
1072
1073         cinfo = get_call_info (cfg, cfg->mempool, sig, sig->pinvoke);
1074
1075         if (cinfo->ret.storage == ArgAggregate) {
1076                 MonoInst *vtarg;
1077                 MonoInst *local;
1078
1079                 /* 
1080                  * The valuetype is in registers after the call, need to be copied 
1081                  * to the stack. Save the address to a local here, so the call 
1082                  * instruction can access it.
1083                  */
1084                 local = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
1085                 local->flags |= MONO_INST_VOLATILE;
1086                 cfg->arch.ret_var_addr_local = local;
1087
1088                 MONO_INST_NEW (cfg, vtarg, OP_MOVE);
1089                 vtarg->sreg1 = call->vret_var->dreg;
1090                 vtarg->dreg = local->dreg;
1091                 MONO_ADD_INS (cfg->cbb, vtarg);
1092         }
1093
1094         if (cinfo->ret.storage == ArgValuetypeAddrInIReg) {
1095                 add_outarg_reg (cfg, call, ArgInIReg, cfg->arch.reg_out0 + cinfo->ret.reg, call->vret_var);
1096         }
1097
1098         for (i = 0; i < n; ++i) {
1099                 MonoType *arg_type;
1100
1101                 ainfo = cinfo->args + i;
1102
1103                 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
1104                         /* Emit the signature cookie just before the implicit arguments */
1105                         emit_sig_cookie (cfg, call, cinfo);
1106                 }
1107
1108                 in = call->args [i];
1109
1110                 if (sig->hasthis && (i == 0))
1111                         arg_type = &mono_defaults.object_class->byval_arg;
1112                 else
1113                         arg_type = sig->params [i - sig->hasthis];
1114
1115                 if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(arg_type))) {
1116                         guint32 align;
1117                         guint32 size;
1118
1119                         if (arg_type->type == MONO_TYPE_TYPEDBYREF) {
1120                                 size = sizeof (MonoTypedRef);
1121                                 align = sizeof (gpointer);
1122                         }
1123                         else if (sig->pinvoke)
1124                                 size = mono_type_native_stack_size (&in->klass->byval_arg, &align);
1125                         else {
1126                                 /* 
1127                                  * Other backends use mono_type_stack_size (), but that
1128                                  * aligns the size to 8, which is larger than the size of
1129                                  * the source, leading to reads of invalid memory if the
1130                                  * source is at the end of address space.
1131                                  */
1132                                 size = mono_class_value_size (in->klass, &align);
1133                         }
1134
1135                         if (size > 0) {
1136                                 MonoInst *arg;
1137
1138                                 MONO_INST_NEW (cfg, arg, OP_OUTARG_VT);
1139                                 arg->sreg1 = in->dreg;
1140                                 arg->klass = in->klass;
1141                                 arg->backend.size = size;
1142                                 arg->inst_p0 = call;
1143                                 arg->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
1144                                 memcpy (arg->inst_p1, ainfo, sizeof (ArgInfo));
1145
1146                                 MONO_ADD_INS (cfg->cbb, arg);
1147                         }
1148                 }
1149                 else {
1150                         switch (ainfo->storage) {
1151                         case ArgInIReg:
1152                                 add_outarg_reg (cfg, call, ainfo->storage, cfg->arch.reg_out0 + ainfo->reg, in);
1153                                 break;
1154                         case ArgInFloatReg:
1155                         case ArgInFloatRegR4:
1156                                 add_outarg_reg (cfg, call, ainfo->storage, ainfo->reg, in);
1157                                 break;
1158                         case ArgOnStack:
1159                                 if (arg_type->type == MONO_TYPE_R4 && !arg_type->byref)
1160                                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, IA64_SP, 16 + ainfo->offset, in->dreg);
1161                                 else if (arg_type->type == MONO_TYPE_R8 && !arg_type->byref)
1162                                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, IA64_SP, 16 + ainfo->offset, in->dreg);
1163                                 else
1164                                         MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI8_MEMBASE_REG, IA64_SP, 16 + ainfo->offset, in->dreg);
1165                                 break;
1166                         default:
1167                                 g_assert_not_reached ();
1168                         }
1169                 }
1170         }
1171
1172         /* Handle the case where there are no implicit arguments */
1173         if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sig->sentinelpos)) {
1174                 emit_sig_cookie (cfg, call, cinfo);
1175         }
1176
1177         call->stack_usage = cinfo->stack_usage;
1178         cfg->arch.n_out_regs = MAX (cfg->arch.n_out_regs, cinfo->reg_usage);
1179 }
1180
1181 void
1182 mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
1183 {
1184         MonoCallInst *call = (MonoCallInst*)ins->inst_p0;
1185         ArgInfo *ainfo = (ArgInfo*)ins->inst_p1;
1186         int size = ins->backend.size;
1187
1188         if (ainfo->storage == ArgAggregate) {
1189                 MonoInst *load, *store;
1190                 int i, slot;
1191
1192                 /* 
1193                  * Part of the structure is passed in registers.
1194                  */
1195                 for (i = 0; i < ainfo->nregs; ++i) {
1196                         slot = ainfo->reg + i;
1197                         
1198                         if (ainfo->atype == AggregateSingleHFA) {
1199                                 MONO_INST_NEW (cfg, load, OP_LOADR4_MEMBASE);
1200                                 load->inst_basereg = src->dreg;
1201                                 load->inst_offset = i * 4;
1202                                 load->dreg = mono_alloc_freg (cfg);
1203
1204                                 mono_call_inst_add_outarg_reg (cfg, call, load->dreg, ainfo->reg + i, TRUE);
1205                         } else if (ainfo->atype == AggregateDoubleHFA) {
1206                                 MONO_INST_NEW (cfg, load, OP_LOADR8_MEMBASE);
1207                                 load->inst_basereg = src->dreg;
1208                                 load->inst_offset = i * 8;
1209                                 load->dreg = mono_alloc_freg (cfg);
1210
1211                                 mono_call_inst_add_outarg_reg (cfg, call, load->dreg, ainfo->reg + i, TRUE);
1212                         } else {
1213                                 MONO_INST_NEW (cfg, load, OP_LOADI8_MEMBASE);
1214                                 load->inst_basereg = src->dreg;
1215                                 load->inst_offset = i * 8;
1216                                 load->dreg = mono_alloc_ireg (cfg);
1217
1218                                 mono_call_inst_add_outarg_reg (cfg, call, load->dreg, cfg->arch.reg_out0 + ainfo->reg + i, FALSE);
1219                         }
1220                         MONO_ADD_INS (cfg->cbb, load);
1221                 }
1222
1223                 /* 
1224                  * Part of the structure is passed on the stack.
1225                  */
1226                 for (i = ainfo->nregs; i < ainfo->nslots; ++i) {
1227                         slot = ainfo->reg + i;
1228
1229                         MONO_INST_NEW (cfg, load, OP_LOADI8_MEMBASE);
1230                         load->inst_basereg = src->dreg;
1231                         load->inst_offset = i * sizeof (gpointer);
1232                         load->dreg = mono_alloc_preg (cfg);
1233                         MONO_ADD_INS (cfg->cbb, load);
1234
1235                         MONO_INST_NEW (cfg, store, OP_STOREI8_MEMBASE_REG);
1236                         store->sreg1 = load->dreg;
1237                         store->inst_destbasereg = IA64_SP;
1238                         store->inst_offset = 16 + ainfo->offset + (slot - 8) * 8;
1239                         MONO_ADD_INS (cfg->cbb, store);
1240                 }
1241         } else {
1242                 mini_emit_memcpy (cfg, IA64_SP, 16 + ainfo->offset, src->dreg, 0, size, 4);
1243         }
1244 }
1245
1246 void
1247 mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
1248 {
1249         CallInfo *cinfo = get_call_info (cfg, cfg->mempool, mono_method_signature (method), FALSE);
1250
1251         switch (cinfo->ret.storage) {
1252         case ArgInIReg:
1253                 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
1254                 break;
1255         case ArgInFloatReg:
1256                 MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
1257                 break;
1258         default:
1259                 g_assert_not_reached ();
1260         }
1261 }
1262
1263 void
1264 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1265 {
1266 }
1267
1268 void
1269 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
1270 {
1271         MonoInst *ins, *n, *last_ins = NULL;
1272         ins = bb->code;
1273
1274         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1275                 switch (ins->opcode) {
1276                 case OP_MOVE:
1277                 case OP_FMOVE:
1278                         /*
1279                          * Removes:
1280                          *
1281                          * OP_MOVE reg, reg 
1282                          */
1283                         if (ins->dreg == ins->sreg1) {
1284                                 MONO_DELETE_INS (bb, ins);
1285                                 continue;
1286                         }
1287                         /* 
1288                          * Removes:
1289                          *
1290                          * OP_MOVE sreg, dreg 
1291                          * OP_MOVE dreg, sreg
1292                          */
1293                         if (last_ins && last_ins->opcode == OP_MOVE &&
1294                             ins->sreg1 == last_ins->dreg &&
1295                             ins->dreg == last_ins->sreg1) {
1296                                 MONO_DELETE_INS (bb, ins);
1297                                 continue;
1298                         }
1299                         break;
1300                 case OP_MUL_IMM: 
1301                 case OP_IMUL_IMM: 
1302                         /* remove unnecessary multiplication with 1 */
1303                         if (ins->inst_imm == 1) {
1304                                 if (ins->dreg != ins->sreg1) {
1305                                         ins->opcode = OP_MOVE;
1306                                 } else {
1307                                         MONO_DELETE_INS (bb, ins);
1308                                         continue;
1309                                 }
1310                         }
1311                         break;
1312                 }
1313
1314                 last_ins = ins;
1315                 ins = ins->next;
1316         }
1317         bb->last_ins = last_ins;
1318 }
1319
1320 int cond_to_ia64_cmp [][3] = {
1321         {OP_IA64_CMP_EQ, OP_IA64_CMP4_EQ, OP_IA64_FCMP_EQ},
1322         {OP_IA64_CMP_NE, OP_IA64_CMP4_NE, OP_IA64_FCMP_NE},
1323         {OP_IA64_CMP_LE, OP_IA64_CMP4_LE, OP_IA64_FCMP_LE},
1324         {OP_IA64_CMP_GE, OP_IA64_CMP4_GE, OP_IA64_FCMP_GE},
1325         {OP_IA64_CMP_LT, OP_IA64_CMP4_LT, OP_IA64_FCMP_LT},
1326         {OP_IA64_CMP_GT, OP_IA64_CMP4_GT, OP_IA64_FCMP_GT},
1327         {OP_IA64_CMP_LE_UN, OP_IA64_CMP4_LE_UN, OP_IA64_FCMP_LE_UN},
1328         {OP_IA64_CMP_GE_UN, OP_IA64_CMP4_GE_UN, OP_IA64_FCMP_GE_UN},
1329         {OP_IA64_CMP_LT_UN, OP_IA64_CMP4_LT_UN, OP_IA64_FCMP_LT_UN},
1330         {OP_IA64_CMP_GT_UN, OP_IA64_CMP4_GT_UN, OP_IA64_FCMP_GT_UN}
1331 };
1332
1333 static int
1334 opcode_to_ia64_cmp (int opcode, int cmp_opcode)
1335 {
1336         return cond_to_ia64_cmp [mono_opcode_to_cond (opcode)][mono_opcode_to_type (opcode, cmp_opcode)];
1337 }
1338
1339 int cond_to_ia64_cmp_imm [][3] = {
1340         {OP_IA64_CMP_EQ_IMM, OP_IA64_CMP4_EQ_IMM, 0},
1341         {OP_IA64_CMP_NE_IMM, OP_IA64_CMP4_NE_IMM, 0},
1342         {OP_IA64_CMP_GE_IMM, OP_IA64_CMP4_GE_IMM, 0},
1343         {OP_IA64_CMP_LE_IMM, OP_IA64_CMP4_LE_IMM, 0},
1344         {OP_IA64_CMP_GT_IMM, OP_IA64_CMP4_GT_IMM, 0},
1345         {OP_IA64_CMP_LT_IMM, OP_IA64_CMP4_LT_IMM, 0},
1346         {OP_IA64_CMP_GE_UN_IMM, OP_IA64_CMP4_GE_UN_IMM, 0},
1347         {OP_IA64_CMP_LE_UN_IMM, OP_IA64_CMP4_LE_UN_IMM, 0},
1348         {OP_IA64_CMP_GT_UN_IMM, OP_IA64_CMP4_GT_UN_IMM, 0},
1349         {OP_IA64_CMP_LT_UN_IMM, OP_IA64_CMP4_LT_UN_IMM, 0},
1350 };
1351
1352 static int
1353 opcode_to_ia64_cmp_imm (int opcode, int cmp_opcode)
1354 {
1355         /* The condition needs to be reversed */
1356         return cond_to_ia64_cmp_imm [mono_opcode_to_cond (opcode)][mono_opcode_to_type (opcode, cmp_opcode)];
1357 }
1358
1359 #define NEW_INS(cfg,dest,op) do {       \
1360                 (dest) = mono_mempool_alloc0 ((cfg)->mempool, sizeof (MonoInst));       \
1361                 (dest)->opcode = (op);  \
1362         mono_bblock_insert_after_ins (bb, last_ins, (dest)); \
1363         last_ins = (dest); \
1364         } while (0)
1365
1366 /*
1367  * mono_arch_lowering_pass:
1368  *
1369  *  Converts complex opcodes into simpler ones so that each IR instruction
1370  * corresponds to one machine instruction.
1371  */
1372 void
1373 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1374 {
1375         MonoInst *ins, *n, *next, *temp, *temp2, *temp3, *last_ins = NULL;
1376         ins = bb->code;
1377
1378         if (bb->max_vreg > cfg->rs->next_vreg)
1379                 cfg->rs->next_vreg = bb->max_vreg;
1380
1381         MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1382                 switch (ins->opcode) {
1383                 case OP_STOREI1_MEMBASE_IMM:
1384                 case OP_STOREI2_MEMBASE_IMM:
1385                 case OP_STOREI4_MEMBASE_IMM:
1386                 case OP_STOREI8_MEMBASE_IMM:
1387                 case OP_STORE_MEMBASE_IMM:
1388                         /* There are no store_membase instructions on ia64 */
1389                         if (ins->inst_offset == 0) {
1390                                 temp2 = NULL;
1391                         } else if (ia64_is_imm14 (ins->inst_offset)) {
1392                                 NEW_INS (cfg, temp2, OP_ADD_IMM);
1393                                 temp2->sreg1 = ins->inst_destbasereg;
1394                                 temp2->inst_imm = ins->inst_offset;
1395                                 temp2->dreg = mono_regstate_next_int (cfg->rs);
1396                         }
1397                         else {
1398                                 NEW_INS (cfg, temp, OP_I8CONST);
1399                                 temp->inst_c0 = ins->inst_offset;
1400                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1401
1402                                 NEW_INS (cfg, temp2, OP_LADD);
1403                                 temp2->sreg1 = ins->inst_destbasereg;
1404                                 temp2->sreg2 = temp->dreg;
1405                                 temp2->dreg = mono_regstate_next_int (cfg->rs);
1406                         }
1407
1408                         switch (ins->opcode) {
1409                         case OP_STOREI1_MEMBASE_IMM:
1410                                 ins->opcode = OP_STOREI1_MEMBASE_REG;
1411                                 break;
1412                         case OP_STOREI2_MEMBASE_IMM:
1413                                 ins->opcode = OP_STOREI2_MEMBASE_REG;
1414                                 break;
1415                         case OP_STOREI4_MEMBASE_IMM:
1416                                 ins->opcode = OP_STOREI4_MEMBASE_REG;
1417                                 break;
1418                         case OP_STOREI8_MEMBASE_IMM:
1419                         case OP_STORE_MEMBASE_IMM:
1420                                 ins->opcode = OP_STOREI8_MEMBASE_REG;
1421                                 break;
1422                         default:
1423                                 g_assert_not_reached ();
1424                         }
1425
1426                         if (ins->inst_imm == 0)
1427                                 ins->sreg1 = IA64_R0;
1428                         else {
1429                                 NEW_INS (cfg, temp3, OP_I8CONST);
1430                                 temp3->inst_c0 = ins->inst_imm;
1431                                 temp3->dreg = mono_regstate_next_int (cfg->rs);
1432                                 ins->sreg1 = temp3->dreg;
1433                         }
1434
1435                         ins->inst_offset = 0;
1436                         if (temp2)
1437                                 ins->inst_destbasereg = temp2->dreg;
1438                         break;
1439                 case OP_STOREI1_MEMBASE_REG:
1440                 case OP_STOREI2_MEMBASE_REG:
1441                 case OP_STOREI4_MEMBASE_REG:
1442                 case OP_STOREI8_MEMBASE_REG:
1443                 case OP_STORER4_MEMBASE_REG:
1444                 case OP_STORER8_MEMBASE_REG:
1445                 case OP_STORE_MEMBASE_REG:
1446                         /* There are no store_membase instructions on ia64 */
1447                         if (ins->inst_offset == 0) {
1448                                 break;
1449                         }
1450                         else if (ia64_is_imm14 (ins->inst_offset)) {
1451                                 NEW_INS (cfg, temp2, OP_ADD_IMM);
1452                                 temp2->sreg1 = ins->inst_destbasereg;
1453                                 temp2->inst_imm = ins->inst_offset;
1454                                 temp2->dreg = mono_regstate_next_int (cfg->rs);
1455                         }
1456                         else {
1457                                 NEW_INS (cfg, temp, OP_I8CONST);
1458                                 temp->inst_c0 = ins->inst_offset;
1459                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1460                                 NEW_INS (cfg, temp2, OP_LADD);
1461                                 temp2->sreg1 = ins->inst_destbasereg;
1462                                 temp2->sreg2 = temp->dreg;
1463                                 temp2->dreg = mono_regstate_next_int (cfg->rs);
1464                         }
1465
1466                         ins->inst_offset = 0;
1467                         ins->inst_destbasereg = temp2->dreg;
1468                         break;
1469                 case OP_LOADI1_MEMBASE:
1470                 case OP_LOADU1_MEMBASE:
1471                 case OP_LOADI2_MEMBASE:
1472                 case OP_LOADU2_MEMBASE:
1473                 case OP_LOADI4_MEMBASE:
1474                 case OP_LOADU4_MEMBASE:
1475                 case OP_LOADI8_MEMBASE:
1476                 case OP_LOAD_MEMBASE:
1477                 case OP_LOADR4_MEMBASE:
1478                 case OP_LOADR8_MEMBASE:
1479                 case OP_ATOMIC_EXCHANGE_I4:
1480                 case OP_ATOMIC_EXCHANGE_I8:
1481                 case OP_ATOMIC_ADD_NEW_I4:
1482                 case OP_ATOMIC_ADD_NEW_I8:
1483                 case OP_ATOMIC_ADD_IMM_NEW_I4:
1484                 case OP_ATOMIC_ADD_IMM_NEW_I8:
1485                         /* There are no membase instructions on ia64 */
1486                         if (ins->inst_offset == 0) {
1487                                 break;
1488                         }
1489                         else if (ia64_is_imm14 (ins->inst_offset)) {
1490                                 NEW_INS (cfg, temp2, OP_ADD_IMM);
1491                                 temp2->sreg1 = ins->inst_basereg;
1492                                 temp2->inst_imm = ins->inst_offset;
1493                                 temp2->dreg = mono_regstate_next_int (cfg->rs);
1494                         }
1495                         else {
1496                                 NEW_INS (cfg, temp, OP_I8CONST);
1497                                 temp->inst_c0 = ins->inst_offset;
1498                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1499                                 NEW_INS (cfg, temp2, OP_LADD);
1500                                 temp2->sreg1 = ins->inst_basereg;
1501                                 temp2->sreg2 = temp->dreg;
1502                                 temp2->dreg = mono_regstate_next_int (cfg->rs);
1503                         }
1504
1505                         ins->inst_offset = 0;
1506                         ins->inst_basereg = temp2->dreg;
1507                         break;
1508                 case OP_ADD_IMM:
1509                 case OP_IADD_IMM:
1510                 case OP_LADD_IMM:
1511                 case OP_ISUB_IMM:
1512                 case OP_LSUB_IMM:
1513                 case OP_AND_IMM:
1514                 case OP_IAND_IMM:
1515                 case OP_LAND_IMM:
1516                 case OP_IOR_IMM:
1517                 case OP_LOR_IMM:
1518                 case OP_IXOR_IMM:
1519                 case OP_LXOR_IMM:
1520                 case OP_SHL_IMM:
1521                 case OP_SHR_IMM:
1522                 case OP_ISHL_IMM:
1523                 case OP_LSHL_IMM:
1524                 case OP_ISHR_IMM:
1525                 case OP_LSHR_IMM:
1526                 case OP_ISHR_UN_IMM:
1527                 case OP_LSHR_UN_IMM: {
1528                         gboolean is_imm = FALSE;
1529                         gboolean switched = FALSE;
1530
1531                         if (ins->opcode == OP_AND_IMM && ins->inst_imm == 255) {
1532                                 ins->opcode = OP_ZEXT_I1;
1533                                 break;
1534                         }
1535
1536                         switch (ins->opcode) {
1537                         case OP_ADD_IMM:
1538                         case OP_IADD_IMM:
1539                         case OP_LADD_IMM:
1540                                 is_imm = ia64_is_imm14 (ins->inst_imm);
1541                                 switched = TRUE;
1542                                 break;
1543                         case OP_ISUB_IMM:
1544                         case OP_LSUB_IMM:
1545                                 is_imm = ia64_is_imm14 (- (ins->inst_imm));
1546                                 if (is_imm) {
1547                                         /* A = B - IMM -> A = B + (-IMM) */
1548                                         ins->inst_imm = - ins->inst_imm;
1549                                         ins->opcode = OP_IADD_IMM;
1550                                 }
1551                                 switched = TRUE;
1552                                 break;
1553                         case OP_IAND_IMM:
1554                         case OP_IOR_IMM:
1555                         case OP_IXOR_IMM:
1556                         case OP_AND_IMM:
1557                         case OP_LAND_IMM:
1558                         case OP_LOR_IMM:
1559                         case OP_LXOR_IMM:
1560                                 is_imm = ia64_is_imm8 (ins->inst_imm);
1561                                 switched = TRUE;
1562                                 break;
1563                         case OP_SHL_IMM:
1564                         case OP_SHR_IMM:
1565                         case OP_ISHL_IMM:
1566                         case OP_LSHL_IMM:
1567                         case OP_ISHR_IMM:
1568                         case OP_LSHR_IMM:
1569                         case OP_ISHR_UN_IMM:
1570                         case OP_LSHR_UN_IMM:
1571                                 is_imm = (ins->inst_imm >= 0) && (ins->inst_imm < 64);
1572                                 break;
1573                         default:
1574                                 break;
1575                         }
1576
1577                         if (is_imm) {
1578                                 if (switched)
1579                                         ins->sreg2 = ins->sreg1;
1580                                 break;
1581                         }
1582
1583                         ins->opcode = mono_op_imm_to_op (ins->opcode);
1584
1585                         if (ins->inst_imm == 0)
1586                                 ins->sreg2 = IA64_R0;
1587                         else {
1588                                 NEW_INS (cfg, temp, OP_I8CONST);
1589                                 temp->inst_c0 = ins->inst_imm;
1590                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1591                                 ins->sreg2 = temp->dreg;
1592                         }
1593                         break;
1594                 }
1595                 case OP_COMPARE_IMM:
1596                 case OP_ICOMPARE_IMM:
1597                 case OP_LCOMPARE_IMM: {
1598                         /* Instead of compare+b<cond>, ia64 has compare<cond>+br */
1599                         gboolean imm;
1600                         CompRelation cond;
1601
1602                         next = ins->next;
1603
1604                         /* Branch opts can eliminate the branch */
1605                         if (!next || (!(MONO_IS_COND_BRANCH_OP (next) || MONO_IS_COND_EXC (next) || MONO_IS_SETCC (next)))) {
1606                                 ins->opcode = OP_NOP;
1607                                 break;
1608                         }
1609
1610                         /* 
1611                          * The compare_imm instructions have switched up arguments, and 
1612                          * some of them take an imm between -127 and 128.
1613                          */
1614                         next = ins->next;
1615                         cond = mono_opcode_to_cond (next->opcode);
1616                         if ((cond == CMP_LT) || (cond == CMP_GE))
1617                                 imm = ia64_is_imm8 (ins->inst_imm - 1);
1618                         else if ((cond == CMP_LT_UN) || (cond == CMP_GE_UN))
1619                                 imm = ia64_is_imm8 (ins->inst_imm - 1) && (ins->inst_imm > 0);
1620                         else
1621                                 imm = ia64_is_imm8 (ins->inst_imm);
1622
1623                         if (imm) {
1624                                 ins->opcode = opcode_to_ia64_cmp_imm (next->opcode, ins->opcode);
1625                                 ins->sreg2 = ins->sreg1;
1626                         }
1627                         else {
1628                                 ins->opcode = opcode_to_ia64_cmp (next->opcode, ins->opcode);
1629
1630                                 if (ins->inst_imm == 0)
1631                                         ins->sreg2 = IA64_R0;
1632                                 else {
1633                                         NEW_INS (cfg, temp, OP_I8CONST);
1634                                         temp->inst_c0 = ins->inst_imm;
1635                                         temp->dreg = mono_regstate_next_int (cfg->rs);
1636                                         ins->sreg2 = temp->dreg;
1637                                 }
1638                         }
1639
1640                         if (MONO_IS_COND_BRANCH_OP (next)) {
1641                                 next->opcode = OP_IA64_BR_COND;
1642                                 if (! (next->flags & MONO_INST_BRLABEL))
1643                                         next->inst_target_bb = next->inst_true_bb;
1644                         } else if (MONO_IS_COND_EXC (next)) {
1645                                 next->opcode = OP_IA64_COND_EXC;
1646                         } else if (MONO_IS_SETCC (next)) {
1647                                 next->opcode = OP_IA64_CSET;
1648                         } else {
1649                                 printf ("%s\n", mono_inst_name (next->opcode));
1650                                 NOT_IMPLEMENTED;
1651                         }
1652
1653                         break;
1654                 }
1655                 case OP_COMPARE:
1656                 case OP_ICOMPARE:
1657                 case OP_LCOMPARE:
1658                 case OP_FCOMPARE: {
1659                         /* Instead of compare+b<cond>, ia64 has compare<cond>+br */
1660
1661                         next = ins->next;
1662
1663                         /* Branch opts can eliminate the branch */
1664                         if (!next || (!(MONO_IS_COND_BRANCH_OP (next) || MONO_IS_COND_EXC (next) || MONO_IS_SETCC (next)))) {
1665                                 ins->opcode = OP_NOP;
1666                                 break;
1667                         }
1668
1669                         ins->opcode = opcode_to_ia64_cmp (next->opcode, ins->opcode);
1670
1671                         if (MONO_IS_COND_BRANCH_OP (next)) {
1672                                 next->opcode = OP_IA64_BR_COND;
1673                                 if (! (next->flags & MONO_INST_BRLABEL))
1674                                         next->inst_target_bb = next->inst_true_bb;
1675                         } else if (MONO_IS_COND_EXC (next)) {
1676                                 next->opcode = OP_IA64_COND_EXC;
1677                         } else if (MONO_IS_SETCC (next)) {
1678                                 next->opcode = OP_IA64_CSET;
1679                         } else {
1680                                 printf ("%s\n", mono_inst_name (next->opcode));
1681                                 NOT_IMPLEMENTED;
1682                         }
1683
1684                         break;
1685                 }
1686                 case OP_FCEQ:
1687                 case OP_FCGT:
1688                 case OP_FCGT_UN:
1689                 case OP_FCLT:
1690                 case OP_FCLT_UN:
1691                         /* The front end removes the fcompare, so introduce it again */
1692                         NEW_INS (cfg, temp, opcode_to_ia64_cmp (ins->opcode, OP_FCOMPARE));
1693                         temp->sreg1 = ins->sreg1;
1694                         temp->sreg2 = ins->sreg2;
1695                         
1696                         ins->opcode = OP_IA64_CSET;
1697                         break;
1698                 case OP_MUL_IMM:
1699                 case OP_LMUL_IMM:
1700                 case OP_IMUL_IMM: {
1701                         int i, sum_reg;
1702                         gboolean found = FALSE;
1703                         int shl_op = ins->opcode == OP_IMUL_IMM ? OP_ISHL_IMM : OP_SHL_IMM;
1704
1705                         /* First the easy cases */
1706                         if (ins->inst_imm == 1) {
1707                                 ins->opcode = OP_MOVE;
1708                                 break;
1709                         }
1710                         for (i = 1; i < 64; ++i)
1711                                 if (ins->inst_imm == (((gint64)1) << i)) {
1712                                         ins->opcode = shl_op;
1713                                         ins->inst_imm = i;
1714                                         found = TRUE;
1715                                         break;
1716                                 }
1717
1718                         /* This could be optimized */
1719                         if (!found) {
1720                                 sum_reg = 0;
1721                                 for (i = 0; i < 64; ++i) {
1722                                         if (ins->inst_imm & (((gint64)1) << i)) {
1723                                                 NEW_INS (cfg, temp, shl_op);
1724                                                 temp->dreg = mono_regstate_next_int (cfg->rs);
1725                                                 temp->sreg1 = ins->sreg1;
1726                                                 temp->inst_imm = i;
1727
1728                                                 if (sum_reg == 0)
1729                                                         sum_reg = temp->dreg;
1730                                                 else {
1731                                                         NEW_INS (cfg, temp2, OP_LADD);
1732                                                         temp2->dreg = mono_regstate_next_int (cfg->rs);
1733                                                         temp2->sreg1 = sum_reg;
1734                                                         temp2->sreg2 = temp->dreg;
1735                                                         sum_reg = temp2->dreg;
1736                                                 }
1737                                         }
1738                                 }
1739                                 ins->opcode = OP_MOVE;
1740                                 ins->sreg1 = sum_reg;
1741                         }
1742                         break;
1743                 }
1744                 case OP_LCONV_TO_OVF_U4:
1745                         NEW_INS (cfg, temp, OP_IA64_CMP4_LT);
1746                         temp->sreg1 = ins->sreg1;
1747                         temp->sreg2 = IA64_R0;
1748
1749                         NEW_INS (cfg, temp, OP_IA64_COND_EXC);
1750                         temp->inst_p1 = (char*)"OverflowException";
1751
1752                         ins->opcode = OP_MOVE;
1753                         break;
1754                 case OP_LCONV_TO_OVF_I4_UN:
1755                         NEW_INS (cfg, temp, OP_ICONST);
1756                         temp->inst_c0 = 0x7fffffff;
1757                         temp->dreg = mono_regstate_next_int (cfg->rs);
1758
1759                         NEW_INS (cfg, temp2, OP_IA64_CMP4_GT_UN);
1760                         temp2->sreg1 = ins->sreg1;
1761                         temp2->sreg2 = temp->dreg;
1762
1763                         NEW_INS (cfg, temp, OP_IA64_COND_EXC);
1764                         temp->inst_p1 = (char*)"OverflowException";
1765
1766                         ins->opcode = OP_MOVE;
1767                         break;
1768                 case OP_FCONV_TO_I4:
1769                 case OP_FCONV_TO_I2:
1770                 case OP_FCONV_TO_U2:
1771                 case OP_FCONV_TO_I1:
1772                 case OP_FCONV_TO_U1:
1773                         NEW_INS (cfg, temp, OP_FCONV_TO_I8);
1774                         temp->sreg1 = ins->sreg1;
1775                         temp->dreg = ins->dreg;
1776
1777                         switch (ins->opcode) {
1778                         case OP_FCONV_TO_I4:
1779                                 ins->opcode = OP_SEXT_I4;
1780                                 break;
1781                         case OP_FCONV_TO_I2:
1782                                 ins->opcode = OP_SEXT_I2;
1783                                 break;
1784                         case OP_FCONV_TO_U2:
1785                                 ins->opcode = OP_ZEXT_I4;
1786                                 break;
1787                         case OP_FCONV_TO_I1:
1788                                 ins->opcode = OP_SEXT_I1;
1789                                 break;
1790                         case OP_FCONV_TO_U1:
1791                                 ins->opcode = OP_ZEXT_I1;
1792                                 break;
1793                         default:
1794                                 g_assert_not_reached ();
1795                         }
1796                         ins->sreg1 = ins->dreg;
1797                         break;
1798                 default:
1799                         break;
1800                 }
1801                 last_ins = ins;
1802                 ins = ins->next;
1803         }
1804         bb->last_ins = last_ins;
1805
1806         bb->max_vreg = cfg->rs->next_vreg;
1807 }
1808
1809 /*
1810  * emit_load_volatile_arguments:
1811  *
1812  *  Load volatile arguments from the stack to the original input registers.
1813  * Required before a tail call.
1814  */
1815 static Ia64CodegenState
1816 emit_load_volatile_arguments (MonoCompile *cfg, Ia64CodegenState code)
1817 {
1818         MonoMethod *method = cfg->method;
1819         MonoMethodSignature *sig;
1820         MonoInst *ins;
1821         CallInfo *cinfo;
1822         guint32 i;
1823
1824         /* FIXME: Generate intermediate code instead */
1825
1826         sig = mono_method_signature (method);
1827
1828         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
1829         
1830         /* This is the opposite of the code in emit_prolog */
1831         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1832                 ArgInfo *ainfo = cinfo->args + i;
1833                 gint32 stack_offset;
1834                 MonoType *arg_type;
1835
1836                 ins = cfg->args [i];
1837
1838                 if (sig->hasthis && (i == 0))
1839                         arg_type = &mono_defaults.object_class->byval_arg;
1840                 else
1841                         arg_type = sig->params [i - sig->hasthis];
1842
1843                 arg_type = mono_type_get_underlying_type (arg_type);
1844
1845                 stack_offset = ainfo->offset + ARGS_OFFSET;
1846
1847                 /* Save volatile arguments to the stack */
1848                 if (ins->opcode != OP_REGVAR) {
1849                         switch (ainfo->storage) {
1850                         case ArgInIReg:
1851                         case ArgInFloatReg:
1852                                 /* FIXME: big offsets */
1853                                 g_assert (ins->opcode == OP_REGOFFSET);
1854                                 ia64_adds_imm (code, GP_SCRATCH_REG, ins->inst_offset, ins->inst_basereg);
1855                                 if (arg_type->byref)
1856                                         ia64_ld8 (code, cfg->arch.reg_in0 + ainfo->reg, GP_SCRATCH_REG);
1857                                 else {
1858                                         switch (arg_type->type) {
1859                                         case MONO_TYPE_R4:
1860                                                 ia64_ldfs (code, ainfo->reg, GP_SCRATCH_REG);
1861                                                 break;
1862                                         case MONO_TYPE_R8:
1863                                                 ia64_ldfd (code, ainfo->reg, GP_SCRATCH_REG);
1864                                                 break;
1865                                         default:
1866                                                 ia64_ld8 (code, cfg->arch.reg_in0 + ainfo->reg, GP_SCRATCH_REG);
1867                                                 break;
1868                                         }
1869                                 }
1870                                 break;
1871                         case ArgOnStack:
1872                                 break;
1873                         default:
1874                                 NOT_IMPLEMENTED;
1875                         }
1876                 }
1877
1878                 if (ins->opcode == OP_REGVAR) {
1879                         /* Argument allocated to (non-volatile) register */
1880                         switch (ainfo->storage) {
1881                         case ArgInIReg:
1882                                 if (ins->dreg != cfg->arch.reg_in0 + ainfo->reg)
1883                                         ia64_mov (code, cfg->arch.reg_in0 + ainfo->reg, ins->dreg);
1884                                 break;
1885                         case ArgOnStack:
1886                                 ia64_adds_imm (code, GP_SCRATCH_REG, 16 + ainfo->offset, cfg->frame_reg);
1887                                 ia64_st8 (code, GP_SCRATCH_REG, ins->dreg);
1888                                 break;
1889                         default:
1890                                 NOT_IMPLEMENTED;
1891                         }
1892                 }
1893         }
1894
1895         return code;
1896 }
1897
1898 static Ia64CodegenState
1899 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, Ia64CodegenState code)
1900 {
1901         CallInfo *cinfo;
1902         int i;
1903
1904         /* Move return value to the target register */
1905         switch (ins->opcode) {
1906         case OP_VOIDCALL:
1907         case OP_VOIDCALL_REG:
1908         case OP_VOIDCALL_MEMBASE:
1909                 break;
1910         case OP_CALL:
1911         case OP_CALL_REG:
1912         case OP_CALL_MEMBASE:
1913         case OP_LCALL:
1914         case OP_LCALL_REG:
1915         case OP_LCALL_MEMBASE:
1916                 g_assert (ins->dreg == IA64_R8);
1917                 break;
1918         case OP_FCALL:
1919         case OP_FCALL_REG:
1920         case OP_FCALL_MEMBASE:
1921                 g_assert (ins->dreg == 8);
1922                 if (((MonoCallInst*)ins)->signature->ret->type == MONO_TYPE_R4)
1923                         ia64_fnorm_d_sf (code, ins->dreg, ins->dreg, 0);
1924                 break;
1925         case OP_VCALL:
1926         case OP_VCALL_REG:
1927         case OP_VCALL_MEMBASE:
1928         case OP_VCALL2:
1929         case OP_VCALL2_REG:
1930         case OP_VCALL2_MEMBASE: {
1931                 ArgStorage storage;
1932
1933                 cinfo = get_call_info (cfg, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
1934                 storage = cinfo->ret.storage;
1935
1936                 if (storage == ArgAggregate) {
1937                         MonoInst *local = (MonoInst*)cfg->arch.ret_var_addr_local;
1938
1939                         /* Load address of stack space allocated for the return value */
1940                         ia64_movl (code, GP_SCRATCH_REG, local->inst_offset);
1941                         ia64_add (code, GP_SCRATCH_REG, GP_SCRATCH_REG, local->inst_basereg);
1942                         ia64_ld8 (code, GP_SCRATCH_REG, GP_SCRATCH_REG);
1943
1944                         for (i = 0; i < cinfo->ret.nregs; ++i) {
1945                                 switch (cinfo->ret.atype) {
1946                                 case AggregateNormal:
1947                                         ia64_st8_inc_imm_hint (code, GP_SCRATCH_REG, cinfo->ret.reg + i, 8, 0);
1948                                         break;
1949                                 case AggregateSingleHFA:
1950                                         ia64_stfs_inc_imm_hint (code, GP_SCRATCH_REG, cinfo->ret.reg + i, 4, 0);
1951                                         break;
1952                                 case AggregateDoubleHFA:
1953                                         ia64_stfd_inc_imm_hint (code, GP_SCRATCH_REG, cinfo->ret.reg + i, 8, 0);
1954                                         break;
1955                                 default:
1956                                         g_assert_not_reached ();
1957                                 }
1958                         }
1959                 }
1960                 break;
1961         }
1962         default:
1963                 g_assert_not_reached ();
1964         }
1965
1966         return code;
1967 }
1968
1969 #define add_patch_info(cfg,code,patch_type,data) do { \
1970         mono_add_patch_info (cfg, code.buf + code.nins - cfg->native_code, patch_type, data); \
1971 } while (0)
1972
1973 #define emit_cond_system_exception(cfg,code,exc_name,predicate) do { \
1974         MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1975     if (tins == NULL) \
1976         add_patch_info (cfg, code, MONO_PATCH_INFO_EXC, exc_name); \
1977     else \
1978                 add_patch_info (cfg, code, MONO_PATCH_INFO_BB, tins->inst_true_bb); \
1979         ia64_br_cond_pred (code, (predicate), 0); \
1980 } while (0)
1981
1982 static Ia64CodegenState
1983 emit_call (MonoCompile *cfg, Ia64CodegenState code, guint32 patch_type, gconstpointer data)
1984 {
1985         add_patch_info (cfg, code, patch_type, data);
1986
1987         if ((patch_type == MONO_PATCH_INFO_ABS) || (patch_type == MONO_PATCH_INFO_INTERNAL_METHOD)) {
1988                 /* Indirect call */
1989                 /* mono_arch_patch_callsite will patch this */
1990                 /* mono_arch_nullify_class_init_trampoline will patch this */
1991                 ia64_movl (code, GP_SCRATCH_REG, 0);
1992                 ia64_ld8_inc_imm (code, GP_SCRATCH_REG2, GP_SCRATCH_REG, 8);
1993                 ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG2);
1994                 ia64_ld8 (code, IA64_GP, GP_SCRATCH_REG);
1995                 ia64_br_call_reg (code, IA64_B0, IA64_B6);
1996         }
1997         else {
1998                 /* Can't use a direct call since the displacement might be too small */
1999                 /* mono_arch_patch_callsite will patch this */
2000                 ia64_movl (code, GP_SCRATCH_REG, 0);
2001                 ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG);
2002                 ia64_br_call_reg (code, IA64_B0, IA64_B6);
2003         }
2004
2005         return code;
2006 }
2007
2008 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2009
2010 void
2011 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2012 {
2013         MonoInst *ins;
2014         MonoCallInst *call;
2015         guint offset;
2016         Ia64CodegenState code;
2017         guint8 *code_start = cfg->native_code + cfg->code_len;
2018         MonoInst *last_ins = NULL;
2019         guint last_offset = 0;
2020         int max_len, cpos;
2021
2022         if (cfg->opt & MONO_OPT_LOOP) {
2023                 /* FIXME: */
2024         }
2025
2026         if (cfg->verbose_level > 2)
2027                 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2028
2029         cpos = bb->max_offset;
2030
2031         if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2032                 NOT_IMPLEMENTED;
2033         }
2034
2035         offset = code_start - cfg->native_code;
2036
2037         ia64_codegen_init (code, code_start);
2038
2039 #if 0
2040         if (strstr (cfg->method->name, "conv_ovf_i1") && (bb->block_num == 2))
2041                 break_count ();
2042 #endif
2043
2044         MONO_BB_FOR_EACH_INS (bb, ins) {
2045                 offset = code.buf - cfg->native_code;
2046
2047                 max_len = ((int)(((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN])) + 128;
2048
2049                 while (offset + max_len + 16 > cfg->code_size) {
2050                         ia64_codegen_close (code);
2051
2052                         offset = code.buf - cfg->native_code;
2053
2054                         cfg->code_size *= 2;
2055                         cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2056                         code_start = cfg->native_code + offset;
2057                         mono_jit_stats.code_reallocs++;
2058
2059                         ia64_codegen_init (code, code_start);
2060                 }
2061
2062                 mono_debug_record_line_number (cfg, ins, offset);
2063
2064                 switch (ins->opcode) {
2065                 case OP_ICONST:
2066                 case OP_I8CONST:
2067                         if (ia64_is_imm14 (ins->inst_c0))
2068                                 ia64_adds_imm (code, ins->dreg, ins->inst_c0, IA64_R0);
2069                         else
2070                                 ia64_movl (code, ins->dreg, ins->inst_c0);
2071                         break;
2072                 case OP_JUMP_TABLE:
2073                         add_patch_info (cfg, code, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2074                         ia64_movl (code, ins->dreg, 0);
2075                         break;
2076                 case OP_MOVE:
2077                         ia64_mov (code, ins->dreg, ins->sreg1);
2078                         break;
2079                 case OP_BR:
2080                 case OP_IA64_BR_COND: {
2081                         int pred = 0;
2082                         if (ins->opcode == OP_IA64_BR_COND)
2083                                 pred = 6;
2084                         if (ins->flags & MONO_INST_BRLABEL) {
2085                                 if (ins->inst_i0->inst_c0) {
2086                                         NOT_IMPLEMENTED;
2087                                 } else {
2088                                         add_patch_info (cfg, code, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2089                                         ia64_br_cond_pred (code, pred, 0);
2090                                 }
2091                         } else {
2092                                 if (ins->inst_target_bb->native_offset) {
2093                                         guint8 *pos = code.buf + code.nins;
2094
2095                                         ia64_br_cond_pred (code, pred, 0);
2096                                         ia64_begin_bundle (code);
2097                                         ia64_patch (pos, cfg->native_code + ins->inst_target_bb->native_offset);
2098                                 } else {
2099                                         add_patch_info (cfg, code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2100                                         ia64_br_cond_pred (code, pred, 0);
2101                                 } 
2102                         }
2103                         break;
2104                 }
2105                 case OP_LABEL:
2106                         ia64_begin_bundle (code);
2107                         ins->inst_c0 = code.buf - cfg->native_code;
2108                         break;
2109                 case OP_NOP:
2110                 case OP_RELAXED_NOP:
2111                 case OP_DUMMY_USE:
2112                 case OP_DUMMY_STORE:
2113                 case OP_NOT_REACHED:
2114                 case OP_NOT_NULL:
2115                         break;
2116                 case OP_BR_REG:
2117                         ia64_mov_to_br (code, IA64_B6, ins->sreg1);
2118                         ia64_br_cond_reg (code, IA64_B6);
2119                         break;
2120                 case OP_IADD:
2121                 case OP_LADD:
2122                         ia64_add (code, ins->dreg, ins->sreg1, ins->sreg2);
2123                         break;
2124                 case OP_ISUB:
2125                 case OP_LSUB:
2126                         ia64_sub (code, ins->dreg, ins->sreg1, ins->sreg2);
2127                         break;
2128                 case OP_IAND:
2129                 case OP_LAND:
2130                         ia64_and (code, ins->dreg, ins->sreg1, ins->sreg2);
2131                         break;
2132                 case OP_IOR:
2133                 case OP_LOR:
2134                         ia64_or (code, ins->dreg, ins->sreg1, ins->sreg2);
2135                         break;
2136                 case OP_IXOR:
2137                 case OP_LXOR:
2138                         ia64_xor (code, ins->dreg, ins->sreg1, ins->sreg2);
2139                         break;
2140                 case OP_INEG:
2141                 case OP_LNEG:
2142                         ia64_sub (code, ins->dreg, IA64_R0, ins->sreg1);
2143                         break;
2144                 case OP_INOT:
2145                 case OP_LNOT:
2146                         ia64_andcm_imm (code, ins->dreg, -1, ins->sreg1);
2147                         break;
2148                 case OP_ISHL:
2149                 case OP_LSHL:
2150                         ia64_shl (code, ins->dreg, ins->sreg1, ins->sreg2);
2151                         break;
2152                 case OP_ISHR:
2153                 case OP_LSHR:
2154                         ia64_shr (code, ins->dreg, ins->sreg1, ins->sreg2);
2155                         break;
2156                 case OP_ISHR_UN:
2157                         ia64_zxt4 (code, GP_SCRATCH_REG, ins->sreg1);
2158                         ia64_shr_u (code, ins->dreg, GP_SCRATCH_REG, ins->sreg2);
2159                         break;
2160                 case OP_LSHR_UN:
2161                         ia64_shr_u (code, ins->dreg, ins->sreg1, ins->sreg2);
2162                         break;
2163                 case OP_IADDCC:
2164                         /* p6 and p7 is set if there is signed/unsigned overflow */
2165                         
2166                         /* Set p8-p9 == (sreg2 > 0) */
2167                         ia64_cmp4_lt (code, 8, 9, IA64_R0, ins->sreg2);
2168
2169                         ia64_add (code, GP_SCRATCH_REG, ins->sreg1, ins->sreg2);
2170                         
2171                         /* (sreg2 > 0) && (res < ins->sreg1) => signed overflow */
2172                         ia64_cmp4_lt_pred (code, 8, 6, 10, GP_SCRATCH_REG, ins->sreg1);
2173                         /* (sreg2 <= 0) && (res > ins->sreg1) => signed overflow */
2174                         ia64_cmp4_lt_pred (code, 9, 6, 10, ins->sreg1, GP_SCRATCH_REG);
2175
2176                         /* res <u sreg1 => unsigned overflow */
2177                         ia64_cmp4_ltu (code, 7, 10, GP_SCRATCH_REG, ins->sreg1);
2178
2179                         /* FIXME: Predicate this since this is a side effect */
2180                         ia64_mov (code, ins->dreg, GP_SCRATCH_REG);
2181                         break;
2182                 case OP_ISUBCC:
2183                         /* p6 and p7 is set if there is signed/unsigned overflow */
2184                         
2185                         /* Set p8-p9 == (sreg2 > 0) */
2186                         ia64_cmp4_lt (code, 8, 9, IA64_R0, ins->sreg2);
2187
2188                         ia64_sub (code, GP_SCRATCH_REG, ins->sreg1, ins->sreg2);
2189                         
2190                         /* (sreg2 > 0) && (res > ins->sreg1) => signed overflow */
2191                         ia64_cmp4_gt_pred (code, 8, 6, 10, GP_SCRATCH_REG, ins->sreg1);
2192                         /* (sreg2 <= 0) && (res < ins->sreg1) => signed overflow */
2193                         ia64_cmp4_lt_pred (code, 9, 6, 10, GP_SCRATCH_REG, ins->sreg1);
2194
2195                         /* sreg1 <u sreg2 => unsigned overflow */
2196                         ia64_cmp4_ltu (code, 7, 10, ins->sreg1, ins->sreg2);
2197
2198                         /* FIXME: Predicate this since this is a side effect */
2199                         ia64_mov (code, ins->dreg, GP_SCRATCH_REG);
2200                         break;
2201                 case OP_ADDCC:
2202                         /* Same as OP_IADDCC */
2203                         ia64_cmp_lt (code, 8, 9, IA64_R0, ins->sreg2);
2204
2205                         ia64_add (code, GP_SCRATCH_REG, ins->sreg1, ins->sreg2);
2206                         
2207                         ia64_cmp_lt_pred (code, 8, 6, 10, GP_SCRATCH_REG, ins->sreg1);
2208                         ia64_cmp_lt_pred (code, 9, 6, 10, ins->sreg1, GP_SCRATCH_REG);
2209
2210                         ia64_cmp_ltu (code, 7, 10, GP_SCRATCH_REG, ins->sreg1);
2211
2212                         ia64_mov (code, ins->dreg, GP_SCRATCH_REG);
2213                         break;
2214                 case OP_SUBCC:
2215                         /* Same as OP_ISUBCC */
2216
2217                         ia64_cmp_lt (code, 8, 9, IA64_R0, ins->sreg2);
2218
2219                         ia64_sub (code, GP_SCRATCH_REG, ins->sreg1, ins->sreg2);
2220                         
2221                         ia64_cmp_gt_pred (code, 8, 6, 10, GP_SCRATCH_REG, ins->sreg1);
2222                         ia64_cmp_lt_pred (code, 9, 6, 10, GP_SCRATCH_REG, ins->sreg1);
2223
2224                         ia64_cmp_ltu (code, 7, 10, ins->sreg1, ins->sreg2);
2225
2226                         ia64_mov (code, ins->dreg, GP_SCRATCH_REG);
2227                         break;
2228                 case OP_ADD_IMM:
2229                 case OP_IADD_IMM:
2230                 case OP_LADD_IMM:
2231                         ia64_adds_imm (code, ins->dreg, ins->inst_imm, ins->sreg1);
2232                         break;
2233                 case OP_IAND_IMM:
2234                 case OP_AND_IMM:
2235                 case OP_LAND_IMM:
2236                         ia64_and_imm (code, ins->dreg, ins->inst_imm, ins->sreg1);
2237                         break;
2238                 case OP_IOR_IMM:
2239                 case OP_LOR_IMM:
2240                         ia64_or_imm (code, ins->dreg, ins->inst_imm, ins->sreg1);
2241                         break;
2242                 case OP_IXOR_IMM:
2243                 case OP_LXOR_IMM:
2244                         ia64_xor_imm (code, ins->dreg, ins->inst_imm, ins->sreg1);
2245                         break;
2246                 case OP_SHL_IMM:
2247                 case OP_ISHL_IMM:
2248                 case OP_LSHL_IMM:
2249                         ia64_shl_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2250                         break;
2251                 case OP_SHR_IMM:
2252                 case OP_ISHR_IMM:
2253                 case OP_LSHR_IMM:
2254                         ia64_shr_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2255                         break;
2256                 case OP_ISHR_UN_IMM:
2257                         ia64_zxt4 (code, GP_SCRATCH_REG, ins->sreg1);
2258                         ia64_shr_u_imm (code, ins->dreg, GP_SCRATCH_REG, ins->inst_imm);
2259                         break;
2260                 case OP_LSHR_UN_IMM:
2261                         ia64_shr_u_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2262                         break;
2263                 case OP_LMUL:
2264                         /* Based on gcc code */
2265                         ia64_setf_sig (code, FP_SCRATCH_REG, ins->sreg1);
2266                         ia64_setf_sig (code, FP_SCRATCH_REG2, ins->sreg2);
2267                         ia64_xmpy_l (code, FP_SCRATCH_REG, FP_SCRATCH_REG, FP_SCRATCH_REG2);
2268                         ia64_getf_sig (code, ins->dreg, FP_SCRATCH_REG);
2269                         break;
2270
2271                 case OP_STOREI1_MEMBASE_REG:
2272                         ia64_st1_hint (code, ins->inst_destbasereg, ins->sreg1, 0);
2273                         break;
2274                 case OP_STOREI2_MEMBASE_REG:
2275                         ia64_st2_hint (code, ins->inst_destbasereg, ins->sreg1, 0);
2276                         break;
2277                 case OP_STOREI4_MEMBASE_REG:
2278                         ia64_st4_hint (code, ins->inst_destbasereg, ins->sreg1, 0);
2279                         break;
2280                 case OP_STOREI8_MEMBASE_REG:
2281                 case OP_STORE_MEMBASE_REG:
2282                         if (ins->inst_offset != 0) {
2283                                 /* This is generated by local regalloc */
2284                                 if (ia64_is_imm14 (ins->inst_offset)) {
2285                                         ia64_adds_imm (code, GP_SCRATCH_REG, ins->inst_offset, ins->inst_destbasereg);
2286                                 } else {
2287                                         ia64_movl (code, GP_SCRATCH_REG, ins->inst_offset);
2288                                         ia64_add (code, GP_SCRATCH_REG, GP_SCRATCH_REG, ins->inst_destbasereg);
2289                                 }
2290                                 ins->inst_destbasereg = GP_SCRATCH_REG;
2291                         }
2292                         ia64_st8_hint (code, ins->inst_destbasereg, ins->sreg1, 0);
2293                         break;
2294
2295                 case OP_IA64_STOREI1_MEMBASE_INC_REG:
2296                         ia64_st1_inc_imm_hint (code, ins->inst_destbasereg, ins->sreg1, 1, 0);
2297                         break;
2298                 case OP_IA64_STOREI2_MEMBASE_INC_REG:
2299                         ia64_st2_inc_imm_hint (code, ins->inst_destbasereg, ins->sreg1, 2, 0);
2300                         break;
2301                 case OP_IA64_STOREI4_MEMBASE_INC_REG:
2302                         ia64_st4_inc_imm_hint (code, ins->inst_destbasereg, ins->sreg1, 4, 0);
2303                         break;
2304                 case OP_IA64_STOREI8_MEMBASE_INC_REG:
2305                         ia64_st8_inc_imm_hint (code, ins->inst_destbasereg, ins->sreg1, 8, 0);
2306                         break;
2307
2308                 case OP_LOADU1_MEMBASE:
2309                         ia64_ld1 (code, ins->dreg, ins->inst_basereg);
2310                         break;
2311                 case OP_LOADU2_MEMBASE:
2312                         ia64_ld2 (code, ins->dreg, ins->inst_basereg);
2313                         break;
2314                 case OP_LOADU4_MEMBASE:
2315                         ia64_ld4 (code, ins->dreg, ins->inst_basereg);
2316                         break;
2317                 case OP_LOADI1_MEMBASE:
2318                         ia64_ld1 (code, ins->dreg, ins->inst_basereg);
2319                         ia64_sxt1 (code, ins->dreg, ins->dreg);
2320                         break;
2321                 case OP_LOADI2_MEMBASE:
2322                         ia64_ld2 (code, ins->dreg, ins->inst_basereg);
2323                         ia64_sxt2 (code, ins->dreg, ins->dreg);
2324                         break;
2325                 case OP_LOADI4_MEMBASE:
2326                         ia64_ld4 (code, ins->dreg, ins->inst_basereg);
2327                         ia64_sxt4 (code, ins->dreg, ins->dreg);
2328                         break;
2329                 case OP_LOAD_MEMBASE:
2330                 case OP_LOADI8_MEMBASE:
2331                         if (ins->inst_offset != 0) {
2332                                 /* This is generated by local regalloc */
2333                                 if (ia64_is_imm14 (ins->inst_offset)) {
2334                                         ia64_adds_imm (code, GP_SCRATCH_REG, ins->inst_offset, ins->inst_basereg);
2335                                 } else {
2336                                         ia64_movl (code, GP_SCRATCH_REG, ins->inst_offset);
2337                                         ia64_add (code, GP_SCRATCH_REG, GP_SCRATCH_REG, ins->inst_basereg);
2338                                 }
2339                                 ins->inst_basereg = GP_SCRATCH_REG;
2340                         }
2341                         ia64_ld8 (code, ins->dreg, ins->inst_basereg);
2342                         break;
2343
2344                 case OP_IA64_LOADU1_MEMBASE_INC:
2345                         ia64_ld1_inc_imm_hint (code, ins->dreg, ins->inst_basereg, 1, 0);
2346                         break;
2347                 case OP_IA64_LOADU2_MEMBASE_INC:
2348                         ia64_ld2_inc_imm_hint (code, ins->dreg, ins->inst_basereg, 2, 0);
2349                         break;
2350                 case OP_IA64_LOADU4_MEMBASE_INC:
2351                         ia64_ld4_inc_imm_hint (code, ins->dreg, ins->inst_basereg, 4, 0);
2352                         break;
2353                 case OP_IA64_LOADI8_MEMBASE_INC:
2354                         ia64_ld8_inc_imm_hint (code, ins->dreg, ins->inst_basereg, 8, 0);
2355                         break;
2356
2357                 case OP_SEXT_I1:
2358                         ia64_sxt1 (code, ins->dreg, ins->sreg1);
2359                         break;
2360                 case OP_SEXT_I2:
2361                         ia64_sxt2 (code, ins->dreg, ins->sreg1);
2362                         break;
2363                 case OP_SEXT_I4:
2364                         ia64_sxt4 (code, ins->dreg, ins->sreg1);
2365                         break;
2366                 case OP_ZEXT_I1:
2367                         ia64_zxt1 (code, ins->dreg, ins->sreg1);
2368                         break;
2369                 case OP_ZEXT_I2:
2370                         ia64_zxt2 (code, ins->dreg, ins->sreg1);
2371                         break;
2372                 case OP_ZEXT_I4:
2373                         ia64_zxt4 (code, ins->dreg, ins->sreg1);
2374                         break;
2375
2376                         /* Compare opcodes */
2377                 case OP_IA64_CMP4_EQ:
2378                         ia64_cmp4_eq (code, 6, 7, ins->sreg1, ins->sreg2);
2379                         break;
2380                 case OP_IA64_CMP4_NE:
2381                         ia64_cmp4_ne (code, 6, 7, ins->sreg1, ins->sreg2);
2382                         break;
2383                 case OP_IA64_CMP4_LE:
2384                         ia64_cmp4_le (code, 6, 7, ins->sreg1, ins->sreg2);
2385                         break;
2386                 case OP_IA64_CMP4_LT:
2387                         ia64_cmp4_lt (code, 6, 7, ins->sreg1, ins->sreg2);
2388                         break;
2389                 case OP_IA64_CMP4_GE:
2390                         ia64_cmp4_ge (code, 6, 7, ins->sreg1, ins->sreg2);
2391                         break;
2392                 case OP_IA64_CMP4_GT:
2393                         ia64_cmp4_gt (code, 6, 7, ins->sreg1, ins->sreg2);
2394                         break;
2395                 case OP_IA64_CMP4_LT_UN:
2396                         ia64_cmp4_ltu (code, 6, 7, ins->sreg1, ins->sreg2);
2397                         break;
2398                 case OP_IA64_CMP4_LE_UN:
2399                         ia64_cmp4_leu (code, 6, 7, ins->sreg1, ins->sreg2);
2400                         break;
2401                 case OP_IA64_CMP4_GT_UN:
2402                         ia64_cmp4_gtu (code, 6, 7, ins->sreg1, ins->sreg2);
2403                         break;
2404                 case OP_IA64_CMP4_GE_UN:
2405                         ia64_cmp4_geu (code, 6, 7, ins->sreg1, ins->sreg2);
2406                         break;
2407                 case OP_IA64_CMP_EQ:
2408                         ia64_cmp_eq (code, 6, 7, ins->sreg1, ins->sreg2);
2409                         break;
2410                 case OP_IA64_CMP_NE:
2411                         ia64_cmp_ne (code, 6, 7, ins->sreg1, ins->sreg2);
2412                         break;
2413                 case OP_IA64_CMP_LE:
2414                         ia64_cmp_le (code, 6, 7, ins->sreg1, ins->sreg2);
2415                         break;
2416                 case OP_IA64_CMP_LT:
2417                         ia64_cmp_lt (code, 6, 7, ins->sreg1, ins->sreg2);
2418                         break;
2419                 case OP_IA64_CMP_GE:
2420                         ia64_cmp_ge (code, 6, 7, ins->sreg1, ins->sreg2);
2421                         break;
2422                 case OP_IA64_CMP_GT:
2423                         ia64_cmp_gt (code, 6, 7, ins->sreg1, ins->sreg2);
2424                         break;
2425                 case OP_IA64_CMP_GT_UN:
2426                         ia64_cmp_gtu (code, 6, 7, ins->sreg1, ins->sreg2);
2427                         break;
2428                 case OP_IA64_CMP_LT_UN:
2429                         ia64_cmp_ltu (code, 6, 7, ins->sreg1, ins->sreg2);
2430                         break;
2431                 case OP_IA64_CMP_GE_UN:
2432                         ia64_cmp_geu (code, 6, 7, ins->sreg1, ins->sreg2);
2433                         break;
2434                 case OP_IA64_CMP_LE_UN:
2435                         ia64_cmp_leu (code, 6, 7, ins->sreg1, ins->sreg2);
2436                         break;
2437                 case OP_IA64_CMP4_EQ_IMM:
2438                         ia64_cmp4_eq_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2439                         break;
2440                 case OP_IA64_CMP4_NE_IMM:
2441                         ia64_cmp4_ne_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2442                         break;
2443                 case OP_IA64_CMP4_LE_IMM:
2444                         ia64_cmp4_le_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2445                         break;
2446                 case OP_IA64_CMP4_LT_IMM:
2447                         ia64_cmp4_lt_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2448                         break;
2449                 case OP_IA64_CMP4_GE_IMM:
2450                         ia64_cmp4_ge_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2451                         break;
2452                 case OP_IA64_CMP4_GT_IMM:
2453                         ia64_cmp4_gt_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2454                         break;
2455                 case OP_IA64_CMP4_LT_UN_IMM:
2456                         ia64_cmp4_ltu_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2457                         break;
2458                 case OP_IA64_CMP4_LE_UN_IMM:
2459                         ia64_cmp4_leu_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2460                         break;
2461                 case OP_IA64_CMP4_GT_UN_IMM:
2462                         ia64_cmp4_gtu_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2463                         break;
2464                 case OP_IA64_CMP4_GE_UN_IMM:
2465                         ia64_cmp4_geu_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2466                         break;
2467                 case OP_IA64_CMP_EQ_IMM:
2468                         ia64_cmp_eq_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2469                         break;
2470                 case OP_IA64_CMP_NE_IMM:
2471                         ia64_cmp_ne_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2472                         break;
2473                 case OP_IA64_CMP_LE_IMM:
2474                         ia64_cmp_le_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2475                         break;
2476                 case OP_IA64_CMP_LT_IMM:
2477                         ia64_cmp_lt_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2478                         break;
2479                 case OP_IA64_CMP_GE_IMM:
2480                         ia64_cmp_ge_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2481                         break;
2482                 case OP_IA64_CMP_GT_IMM:
2483                         ia64_cmp_gt_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2484                         break;
2485                 case OP_IA64_CMP_GT_UN_IMM:
2486                         ia64_cmp_gtu_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2487                         break;
2488                 case OP_IA64_CMP_LT_UN_IMM:
2489                         ia64_cmp_ltu_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2490                         break;
2491                 case OP_IA64_CMP_GE_UN_IMM:
2492                         ia64_cmp_geu_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2493                         break;
2494                 case OP_IA64_CMP_LE_UN_IMM:
2495                         ia64_cmp_leu_imm (code, 6, 7, ins->inst_imm, ins->sreg2);
2496                         break;
2497                 case OP_IA64_FCMP_EQ:
2498                         ia64_fcmp_eq_sf (code, 6, 7, ins->sreg1, ins->sreg2, 0);
2499                         break;
2500                 case OP_IA64_FCMP_NE:
2501                         ia64_fcmp_ne_sf (code, 6, 7, ins->sreg1, ins->sreg2, 0);
2502                         break;
2503                 case OP_IA64_FCMP_LT:
2504                         ia64_fcmp_lt_sf (code, 6, 7, ins->sreg1, ins->sreg2, 0);
2505                         break;
2506                 case OP_IA64_FCMP_GT:
2507                         ia64_fcmp_gt_sf (code, 6, 7, ins->sreg1, ins->sreg2, 0);
2508                         break;
2509                 case OP_IA64_FCMP_LE:
2510                         ia64_fcmp_le_sf (code, 6, 7, ins->sreg1, ins->sreg2, 0);
2511                         break;
2512                 case OP_IA64_FCMP_GE:
2513                         ia64_fcmp_ge_sf (code, 6, 7, ins->sreg1, ins->sreg2, 0);
2514                         break;
2515                 case OP_IA64_FCMP_GT_UN:
2516                         ia64_fcmp_gt_sf (code, 6, 7, ins->sreg1, ins->sreg2, 0);
2517                         ia64_fcmp_unord_sf_pred (code, 7, 6, 7, ins->sreg1, ins->sreg2, 0);
2518                         break;
2519                 case OP_IA64_FCMP_LT_UN:
2520                         ia64_fcmp_lt_sf (code, 6, 7, ins->sreg1, ins->sreg2, 0);
2521                         ia64_fcmp_unord_sf_pred (code, 7, 6, 7, ins->sreg1, ins->sreg2, 0);
2522                         break;
2523                 case OP_IA64_FCMP_GE_UN:
2524                         ia64_fcmp_ge_sf (code, 6, 7, ins->sreg1, ins->sreg2, 0);
2525                         ia64_fcmp_unord_sf_pred (code, 7, 6, 7, ins->sreg1, ins->sreg2, 0);
2526                         break;
2527                 case OP_IA64_FCMP_LE_UN:
2528                         ia64_fcmp_le_sf (code, 6, 7, ins->sreg1, ins->sreg2, 0);
2529                         ia64_fcmp_unord_sf_pred (code, 7, 6, 7, ins->sreg1, ins->sreg2, 0);
2530                         break;
2531
2532                 case OP_COND_EXC_IOV:
2533                 case OP_COND_EXC_OV:
2534                         emit_cond_system_exception (cfg, code, "OverflowException", 6);
2535                         break;
2536                 case OP_COND_EXC_IC:
2537                 case OP_COND_EXC_C:
2538                         emit_cond_system_exception (cfg, code, "OverflowException", 7);
2539                         break;
2540                 case OP_IA64_COND_EXC:
2541                         emit_cond_system_exception (cfg, code, ins->inst_p1, 6);
2542                         break;
2543                 case OP_IA64_CSET:
2544                         ia64_mov_pred (code, 7, ins->dreg, IA64_R0);
2545                         ia64_no_stop (code);
2546                         ia64_add1_pred (code, 6, ins->dreg, IA64_R0, IA64_R0);
2547                         break;
2548                 case OP_ICONV_TO_I1:
2549                 case OP_LCONV_TO_I1:
2550                         /* FIXME: Is this needed ? */
2551                         ia64_sxt1 (code, ins->dreg, ins->sreg1);
2552                         break;
2553                 case OP_ICONV_TO_I2:
2554                 case OP_LCONV_TO_I2:
2555                         /* FIXME: Is this needed ? */
2556                         ia64_sxt2 (code, ins->dreg, ins->sreg1);
2557                         break;
2558                 case OP_LCONV_TO_I4:
2559                         /* FIXME: Is this needed ? */
2560                         ia64_sxt4 (code, ins->dreg, ins->sreg1);
2561                         break;
2562                 case OP_ICONV_TO_U1:
2563                 case OP_LCONV_TO_U1:
2564                         /* FIXME: Is this needed */
2565                         ia64_zxt1 (code, ins->dreg, ins->sreg1);
2566                         break;
2567                 case OP_ICONV_TO_U2:
2568                 case OP_LCONV_TO_U2:
2569                         /* FIXME: Is this needed */
2570                         ia64_zxt2 (code, ins->dreg, ins->sreg1);
2571                         break;
2572                 case OP_LCONV_TO_U4:
2573                         /* FIXME: Is this needed */
2574                         ia64_zxt4 (code, ins->dreg, ins->sreg1);
2575                         break;
2576                 case OP_ICONV_TO_I8:
2577                 case OP_ICONV_TO_I:
2578                 case OP_LCONV_TO_I8:
2579                 case OP_LCONV_TO_I:
2580                         ia64_sxt4 (code, ins->dreg, ins->sreg1);
2581                         break;
2582                 case OP_LCONV_TO_U8:
2583                 case OP_LCONV_TO_U:
2584                         ia64_zxt4 (code, ins->dreg, ins->sreg1);
2585                         break;
2586
2587                         /*
2588                          * FLOAT OPCODES
2589                          */
2590                 case OP_R8CONST: {
2591                         double d = *(double *)ins->inst_p0;
2592
2593                         if ((d == 0.0) && (mono_signbit (d) == 0))
2594                                 ia64_fmov (code, ins->dreg, 0);
2595                         else if (d == 1.0)
2596                                 ia64_fmov (code, ins->dreg, 1);
2597                         else {
2598                                 add_patch_info (cfg, code, MONO_PATCH_INFO_R8, ins->inst_p0);
2599                                 ia64_movl (code, GP_SCRATCH_REG, 0);
2600                                 ia64_ldfd (code, ins->dreg, GP_SCRATCH_REG);
2601                         }
2602                         break;
2603                 }
2604                 case OP_R4CONST: {
2605                         float f = *(float *)ins->inst_p0;
2606
2607                         if ((f == 0.0) && (mono_signbit (f) == 0))
2608                                 ia64_fmov (code, ins->dreg, 0);
2609                         else if (f == 1.0)
2610                                 ia64_fmov (code, ins->dreg, 1);
2611                         else {
2612                                 add_patch_info (cfg, code, MONO_PATCH_INFO_R4, ins->inst_p0);
2613                                 ia64_movl (code, GP_SCRATCH_REG, 0);
2614                                 ia64_ldfs (code, ins->dreg, GP_SCRATCH_REG);
2615                         }
2616                         break;
2617                 }
2618                 case OP_FMOVE:
2619                         ia64_fmov (code, ins->dreg, ins->sreg1);
2620                         break;
2621                 case OP_STORER8_MEMBASE_REG:
2622                         if (ins->inst_offset != 0) {
2623                                 /* This is generated by local regalloc */
2624                                 if (ia64_is_imm14 (ins->inst_offset)) {
2625                                         ia64_adds_imm (code, GP_SCRATCH_REG, ins->inst_offset, ins->inst_destbasereg);
2626                                 } else {
2627                                         ia64_movl (code, GP_SCRATCH_REG, ins->inst_offset);
2628                                         ia64_add (code, GP_SCRATCH_REG, GP_SCRATCH_REG, ins->inst_destbasereg);
2629                                 }
2630                                 ins->inst_destbasereg = GP_SCRATCH_REG;
2631                         }
2632                         ia64_stfd_hint (code, ins->inst_destbasereg, ins->sreg1, 0);
2633                         break;
2634                 case OP_STORER4_MEMBASE_REG:
2635                         ia64_fnorm_s_sf (code, FP_SCRATCH_REG, ins->sreg1, 0);
2636                         ia64_stfs_hint (code, ins->inst_destbasereg, FP_SCRATCH_REG, 0);
2637                         break;
2638                 case OP_LOADR8_MEMBASE:
2639                         if (ins->inst_offset != 0) {
2640                                 /* This is generated by local regalloc */
2641                                 if (ia64_is_imm14 (ins->inst_offset)) {
2642                                         ia64_adds_imm (code, GP_SCRATCH_REG, ins->inst_offset, ins->inst_basereg);
2643                                 } else {
2644                                         ia64_movl (code, GP_SCRATCH_REG, ins->inst_offset);
2645                                         ia64_add (code, GP_SCRATCH_REG, GP_SCRATCH_REG, ins->inst_basereg);
2646                                 }
2647                                 ins->inst_basereg = GP_SCRATCH_REG;
2648                         }
2649                         ia64_ldfd (code, ins->dreg, ins->inst_basereg);
2650                         break;
2651                 case OP_LOADR4_MEMBASE:
2652                         ia64_ldfs (code, ins->dreg, ins->inst_basereg);
2653                         ia64_fnorm_d_sf (code, ins->dreg, ins->dreg, 0);
2654                         break;
2655                 case OP_ICONV_TO_R4:
2656                 case OP_LCONV_TO_R4:
2657                         ia64_setf_sig (code, ins->dreg, ins->sreg1);
2658                         ia64_fcvt_xf (code, ins->dreg, ins->dreg);
2659                         ia64_fnorm_s_sf (code, ins->dreg, ins->dreg, 0);
2660                         break;
2661                 case OP_ICONV_TO_R8:
2662                 case OP_LCONV_TO_R8:
2663                         ia64_setf_sig (code, ins->dreg, ins->sreg1);
2664                         ia64_fcvt_xf (code, ins->dreg, ins->dreg);
2665                         ia64_fnorm_d_sf (code, ins->dreg, ins->dreg, 0);
2666                         break;
2667                 case OP_FCONV_TO_R4:
2668                         ia64_fnorm_s_sf (code, ins->dreg, ins->sreg1, 0);
2669                         break;
2670                 case OP_FCONV_TO_I8:
2671                 case OP_FCONV_TO_I:
2672                         ia64_fcvt_fx_trunc_sf (code, FP_SCRATCH_REG, ins->sreg1, 0);
2673                         ia64_getf_sig (code, ins->dreg, FP_SCRATCH_REG);
2674                         break;
2675                 case OP_FADD:
2676                         ia64_fma_d_sf (code, ins->dreg, ins->sreg1, 1, ins->sreg2, 0);
2677                         break;
2678                 case OP_FSUB:
2679                         ia64_fms_d_sf (code, ins->dreg, ins->sreg1, 1, ins->sreg2, 0);
2680                         break;
2681                 case OP_FMUL:
2682                         ia64_fma_d_sf (code, ins->dreg, ins->sreg1, ins->sreg2, 0, 0);
2683                         break;
2684                 case OP_FNEG:
2685                         ia64_fmerge_ns (code, ins->dreg, ins->sreg1, ins->sreg1);
2686                         break;
2687                 case OP_CKFINITE:
2688                         /* Quiet NaN */
2689                         ia64_fclass_m (code, 6, 7, ins->sreg1, 0x080);
2690                         emit_cond_system_exception (cfg, code, "ArithmeticException", 6);
2691                         /* Signaling NaN */
2692                         ia64_fclass_m (code, 6, 7, ins->sreg1, 0x040);
2693                         emit_cond_system_exception (cfg, code, "ArithmeticException", 6);
2694                         /* Positive infinity */
2695                         ia64_fclass_m (code, 6, 7, ins->sreg1, 0x021);
2696                         emit_cond_system_exception (cfg, code, "ArithmeticException", 6);
2697                         /* Negative infinity */
2698                         ia64_fclass_m (code, 6, 7, ins->sreg1, 0x022);
2699                         emit_cond_system_exception (cfg, code, "ArithmeticException", 6);
2700                         break;
2701
2702                 /* Calls */
2703                 case OP_CHECK_THIS:
2704                         /* ensure ins->sreg1 is not NULL */
2705                         ia64_ld8 (code, GP_SCRATCH_REG, ins->sreg1);
2706                         break;
2707                 case OP_ARGLIST:
2708                         ia64_adds_imm (code, GP_SCRATCH_REG, cfg->sig_cookie, cfg->frame_reg);
2709                         ia64_st8 (code, ins->sreg1, GP_SCRATCH_REG);
2710                         break;
2711                 case OP_FCALL:
2712                 case OP_LCALL:
2713                 case OP_VCALL:
2714                 case OP_VCALL2:
2715                 case OP_VOIDCALL:
2716                 case OP_CALL:
2717                         call = (MonoCallInst*)ins;
2718
2719                         if (ins->flags & MONO_INST_HAS_METHOD)
2720                                 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2721                         else
2722                                 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2723
2724                         code = emit_move_return_value (cfg, ins, code);
2725                         break;
2726
2727                 case OP_CALL_REG:
2728                 case OP_FCALL_REG:
2729                 case OP_LCALL_REG:
2730                 case OP_VCALL_REG:
2731                 case OP_VCALL2_REG:
2732                 case OP_VOIDCALL_REG: {
2733                         MonoCallInst *call = (MonoCallInst*)ins;
2734                         CallInfo *cinfo;
2735                         int out_reg;
2736
2737                         /* 
2738                          * mono_arch_find_this_arg () needs to find the this argument in a global 
2739                          * register.
2740                          */
2741                         cinfo = get_call_info (cfg, cfg->mempool, call->signature, FALSE);
2742                         out_reg = cfg->arch.reg_out0;
2743                         if (cinfo->ret.storage == ArgValuetypeAddrInIReg)
2744                                 out_reg ++;
2745                         ia64_mov (code, IA64_R10, out_reg);
2746
2747                         /* Indirect call */
2748                         ia64_mov (code, IA64_R8, ins->sreg1);
2749                         ia64_ld8_inc_imm (code, GP_SCRATCH_REG2, IA64_R8, 8);
2750                         ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG2);
2751                         ia64_ld8 (code, IA64_GP, IA64_R8);
2752                         ia64_br_call_reg (code, IA64_B0, IA64_B6);
2753
2754                         code = emit_move_return_value (cfg, ins, code);
2755                         break;
2756                 }
2757                 case OP_FCALL_MEMBASE:
2758                 case OP_LCALL_MEMBASE:
2759                 case OP_VCALL_MEMBASE:
2760                 case OP_VCALL2_MEMBASE:
2761                 case OP_VOIDCALL_MEMBASE:
2762                 case OP_CALL_MEMBASE: {
2763                         MonoCallInst *call = (MonoCallInst*)ins;
2764                         CallInfo *cinfo;
2765                         int out_reg;
2766
2767                         /* 
2768                          * There are no membase instructions on ia64, but we can't 
2769                          * lower this since get_vcall_slot_addr () needs to decode it.
2770                          */
2771
2772                         /* Keep this in synch with get_vcall_slot_addr */
2773                         ia64_mov (code, IA64_R11, ins->sreg1);
2774                         if (ia64_is_imm14 (ins->inst_offset))
2775                                 ia64_adds_imm (code, IA64_R8, ins->inst_offset, ins->sreg1);
2776                         else {
2777                                 ia64_movl (code, GP_SCRATCH_REG, ins->inst_offset);
2778                                 ia64_add (code, IA64_R8, GP_SCRATCH_REG, ins->sreg1);
2779                         }
2780
2781                         if (call->method && ins->inst_offset < 0) {
2782                                 /* 
2783                                  * This is a possible IMT call so save the IMT method in a global 
2784                                  * register where mono_arch_find_imt_method () and its friends can 
2785                                  * access it.
2786                                  */
2787                                 ia64_movl (code, IA64_R9, call->method);
2788                         }
2789
2790                         /* 
2791                          * mono_arch_find_this_arg () needs to find the this argument in a global 
2792                          * register.
2793                          */
2794                         cinfo = get_call_info (cfg, cfg->mempool, call->signature, FALSE);
2795                         out_reg = cfg->arch.reg_out0;
2796                         if (cinfo->ret.storage == ArgValuetypeAddrInIReg)
2797                                 out_reg ++;
2798                         ia64_mov (code, IA64_R10, out_reg);
2799
2800                         ia64_begin_bundle (code);
2801                         ia64_codegen_set_one_ins_per_bundle (code, TRUE);
2802
2803                         ia64_ld8 (code, GP_SCRATCH_REG, IA64_R8);
2804
2805                         ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG);
2806
2807                         /*
2808                          * This nop will tell get_vcall_slot_addr that this is a virtual 
2809                          * call.
2810                          */
2811                         ia64_nop_i (code, 0x12345);
2812
2813                         ia64_br_call_reg (code, IA64_B0, IA64_B6);
2814
2815                         ia64_codegen_set_one_ins_per_bundle (code, FALSE);
2816
2817                         code = emit_move_return_value (cfg, ins, code);
2818                         break;
2819                 }
2820                 case OP_JMP: {
2821                         /*
2822                          * Keep in sync with the code in emit_epilog.
2823                          */
2824
2825                         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2826                                 NOT_IMPLEMENTED;
2827
2828                         g_assert (!cfg->method->save_lmf);
2829
2830                         /* Load arguments into their original registers */
2831                         code = emit_load_volatile_arguments (cfg, code);
2832
2833                         if (cfg->arch.stack_alloc_size) {
2834                                 if (cfg->arch.omit_fp) {
2835                                         if (ia64_is_imm14 (cfg->arch.stack_alloc_size))
2836                                                 ia64_adds_imm (code, IA64_SP, (cfg->arch.stack_alloc_size), IA64_SP);
2837                                         else {
2838                                                 ia64_movl (code, GP_SCRATCH_REG, cfg->arch.stack_alloc_size);
2839                                                 ia64_add (code, IA64_SP, GP_SCRATCH_REG, IA64_SP);
2840                                         }
2841                                 }
2842                                 else
2843                                         ia64_mov (code, IA64_SP, cfg->arch.reg_saved_sp);
2844                         }
2845                         ia64_mov_to_ar_i (code, IA64_PFS, cfg->arch.reg_saved_ar_pfs);
2846                         ia64_mov_ret_to_br (code, IA64_B0, cfg->arch.reg_saved_b0);
2847
2848                         add_patch_info (cfg, code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2849                         ia64_movl (code, GP_SCRATCH_REG, 0);
2850                         ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG);
2851                         ia64_br_cond_reg (code, IA64_B6);
2852
2853                         break;
2854                 }
2855                 case OP_BREAK:
2856                         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, mono_break);
2857                         break;
2858
2859                 case OP_LOCALLOC: {
2860                         gint32 abi_offset;
2861
2862                         /* FIXME: Sigaltstack support */
2863
2864                         /* keep alignment */
2865                         ia64_adds_imm (code, GP_SCRATCH_REG, MONO_ARCH_LOCALLOC_ALIGNMENT - 1, ins->sreg1);
2866                         ia64_movl (code, GP_SCRATCH_REG2, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2867                         ia64_and (code, GP_SCRATCH_REG, GP_SCRATCH_REG, GP_SCRATCH_REG2);
2868
2869                         ia64_sub (code, IA64_SP, IA64_SP, GP_SCRATCH_REG);
2870
2871                         ia64_mov (code, ins->dreg, IA64_SP);
2872
2873                         /* An area at sp is reserved by the ABI for parameter passing */
2874                         abi_offset = - ALIGN_TO (cfg->param_area + 16, MONO_ARCH_LOCALLOC_ALIGNMENT);
2875                         if (ia64_is_adds_imm (abi_offset))
2876                                 ia64_adds_imm (code, IA64_SP, abi_offset, IA64_SP);
2877                         else {
2878                                 ia64_movl (code, GP_SCRATCH_REG2, abi_offset);
2879                                 ia64_add (code, IA64_SP, IA64_SP, GP_SCRATCH_REG2);
2880                         }
2881
2882                         if (ins->flags & MONO_INST_INIT) {
2883                                 /* Upper limit */
2884                                 ia64_add (code, GP_SCRATCH_REG2, ins->dreg, GP_SCRATCH_REG);
2885
2886                                 ia64_codegen_set_one_ins_per_bundle (code, TRUE);
2887
2888                                 /* Init loop */
2889                                 ia64_st8_inc_imm_hint (code, ins->dreg, IA64_R0, 8, 0);
2890                                 ia64_cmp_lt (code, 8, 9, ins->dreg, GP_SCRATCH_REG2);
2891                                 ia64_br_cond_pred (code, 8, -2);
2892
2893                                 ia64_codegen_set_one_ins_per_bundle (code, FALSE);
2894
2895                                 ia64_sub (code, ins->dreg, GP_SCRATCH_REG2, GP_SCRATCH_REG);
2896                         }
2897
2898                         break;
2899                 }
2900                 case OP_LOCALLOC_IMM: {
2901                         gint32 abi_offset;
2902
2903                         /* FIXME: Sigaltstack support */
2904
2905                         gssize size = ins->inst_imm;
2906                         size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
2907
2908                         if (ia64_is_adds_imm (size))
2909                                 ia64_adds_imm (code, GP_SCRATCH_REG, size, IA64_R0);
2910                         else
2911                                 ia64_movl (code, GP_SCRATCH_REG, size);
2912
2913                         ia64_sub (code, IA64_SP, IA64_SP, GP_SCRATCH_REG);
2914                         ia64_mov (code, ins->dreg, IA64_SP);
2915
2916                         /* An area at sp is reserved by the ABI for parameter passing */
2917                         abi_offset = - ALIGN_TO (cfg->param_area + 16, MONO_ARCH_FRAME_ALIGNMENT);
2918                         if (ia64_is_adds_imm (abi_offset))
2919                                 ia64_adds_imm (code, IA64_SP, abi_offset, IA64_SP);
2920                         else {
2921                                 ia64_movl (code, GP_SCRATCH_REG2, abi_offset);
2922                                 ia64_add (code, IA64_SP, IA64_SP, GP_SCRATCH_REG2);
2923                         }
2924
2925                         if (ins->flags & MONO_INST_INIT) {
2926                                 /* Upper limit */
2927                                 ia64_add (code, GP_SCRATCH_REG2, ins->dreg, GP_SCRATCH_REG);
2928
2929                                 ia64_codegen_set_one_ins_per_bundle (code, TRUE);
2930
2931                                 /* Init loop */
2932                                 ia64_st8_inc_imm_hint (code, ins->dreg, IA64_R0, 8, 0);
2933                                 ia64_cmp_lt (code, 8, 9, ins->dreg, GP_SCRATCH_REG2);
2934                                 ia64_br_cond_pred (code, 8, -2);
2935
2936                                 ia64_codegen_set_one_ins_per_bundle (code, FALSE);
2937
2938                                 ia64_sub (code, ins->dreg, GP_SCRATCH_REG2, GP_SCRATCH_REG);
2939                         }
2940
2941                         break;
2942                 }
2943                 case OP_TLS_GET:
2944                         ia64_adds_imm (code, ins->dreg, ins->inst_offset, IA64_TP);
2945                         ia64_ld8 (code, ins->dreg, ins->dreg);
2946                         break;
2947
2948                         /* Synchronization */
2949                 case OP_MEMORY_BARRIER:
2950                         ia64_mf (code);
2951                         break;
2952                 case OP_ATOMIC_ADD_IMM_NEW_I4:
2953                         g_assert (ins->inst_offset == 0);
2954                         ia64_fetchadd4_acq_hint (code, ins->dreg, ins->inst_basereg, ins->inst_imm, 0);
2955                         ia64_adds_imm (code, ins->dreg, ins->inst_imm, ins->dreg);
2956                         break;
2957                 case OP_ATOMIC_ADD_IMM_NEW_I8:
2958                         g_assert (ins->inst_offset == 0);
2959                         ia64_fetchadd8_acq_hint (code, ins->dreg, ins->inst_basereg, ins->inst_imm, 0);
2960                         ia64_adds_imm (code, ins->dreg, ins->inst_imm, ins->dreg);
2961                         break;
2962                 case OP_ATOMIC_EXCHANGE_I4:
2963                         ia64_xchg4_hint (code, ins->dreg, ins->inst_basereg, ins->sreg2, 0);
2964                         ia64_sxt4 (code, ins->dreg, ins->dreg);
2965                         break;
2966                 case OP_ATOMIC_EXCHANGE_I8:
2967                         ia64_xchg8_hint (code, ins->dreg, ins->inst_basereg, ins->sreg2, 0);
2968                         break;
2969                 case OP_ATOMIC_ADD_NEW_I4: {
2970                         guint8 *label, *buf;
2971
2972                         /* From libatomic_ops */
2973                         ia64_mf (code);
2974
2975                         ia64_begin_bundle (code);
2976                         label = code.buf + code.nins;
2977                         ia64_ld4_acq (code, GP_SCRATCH_REG, ins->sreg1);
2978                         ia64_add (code, GP_SCRATCH_REG2, GP_SCRATCH_REG, ins->sreg2);
2979                         ia64_mov_to_ar_m (code, IA64_CCV, GP_SCRATCH_REG);
2980                         ia64_cmpxchg4_acq_hint (code, GP_SCRATCH_REG2, ins->sreg1, GP_SCRATCH_REG2, 0);
2981                         ia64_cmp4_eq (code, 6, 7, GP_SCRATCH_REG, GP_SCRATCH_REG2);
2982                         buf = code.buf + code.nins;
2983                         ia64_br_cond_pred (code, 7, 0);
2984                         ia64_begin_bundle (code);
2985                         ia64_patch (buf, label);
2986                         ia64_add (code, ins->dreg, GP_SCRATCH_REG, ins->sreg2);
2987                         break;
2988                 }
2989                 case OP_ATOMIC_ADD_NEW_I8: {
2990                         guint8 *label, *buf;
2991
2992                         /* From libatomic_ops */
2993                         ia64_mf (code);
2994
2995                         ia64_begin_bundle (code);
2996                         label = code.buf + code.nins;
2997                         ia64_ld8_acq (code, GP_SCRATCH_REG, ins->sreg1);
2998                         ia64_add (code, GP_SCRATCH_REG2, GP_SCRATCH_REG, ins->sreg2);
2999                         ia64_mov_to_ar_m (code, IA64_CCV, GP_SCRATCH_REG);
3000                         ia64_cmpxchg8_acq_hint (code, GP_SCRATCH_REG2, ins->sreg1, GP_SCRATCH_REG2, 0);
3001                         ia64_cmp_eq (code, 6, 7, GP_SCRATCH_REG, GP_SCRATCH_REG2);
3002                         buf = code.buf + code.nins;
3003                         ia64_br_cond_pred (code, 7, 0);
3004                         ia64_begin_bundle (code);
3005                         ia64_patch (buf, label);
3006                         ia64_add (code, ins->dreg, GP_SCRATCH_REG, ins->sreg2);
3007                         break;
3008                 }
3009
3010                         /* Exception handling */
3011                 case OP_CALL_HANDLER:
3012                         /*
3013                          * Using a call instruction would mess up the register stack, so
3014                          * save the return address to a register and use a
3015                          * branch.
3016                          */
3017                         ia64_codegen_set_one_ins_per_bundle (code, TRUE);
3018                         ia64_mov (code, IA64_R15, IA64_R0);
3019                         ia64_mov_from_ip (code, GP_SCRATCH_REG);
3020                         /* Add the length of OP_CALL_HANDLER */
3021                         ia64_adds_imm (code, GP_SCRATCH_REG, 5 * 16, GP_SCRATCH_REG);
3022                         add_patch_info (cfg, code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
3023                         ia64_movl (code, GP_SCRATCH_REG2, 0);
3024                         ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG2);
3025                         ia64_br_cond_reg (code, IA64_B6);
3026                         ia64_codegen_set_one_ins_per_bundle (code, FALSE);
3027                         break;
3028                 case OP_START_HANDLER: {
3029                         /*
3030                          * We receive the return address in GP_SCRATCH_REG.
3031                          */
3032                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3033
3034                         /* 
3035                          * R15 determines our caller. It is used since it is writable using
3036                          * libunwind.
3037                          * R15 == 0 means we are called by OP_CALL_HANDLER or via resume_context ()
3038                          * R15 != 0 means we are called by call_filter ().
3039                          */
3040                         ia64_codegen_set_one_ins_per_bundle (code, TRUE);
3041                         ia64_cmp_eq (code, 6, 7, IA64_R15, IA64_R0);
3042
3043                         ia64_br_cond_pred (code, 6, 6);
3044
3045                         /*
3046                          * Called by call_filter:
3047                          * Allocate a new stack frame, and set the fp register from the 
3048                          * value passed in by the caller.
3049                          * We allocate a similar frame as is done by the prolog, so
3050                          * if an exception is thrown while executing the filter, the
3051                          * unwinder can unwind through the filter frame using the unwind
3052                          * info for the prolog. 
3053                          */
3054                         ia64_alloc (code, cfg->arch.reg_saved_ar_pfs, cfg->arch.reg_local0 - cfg->arch.reg_in0, cfg->arch.reg_out0 - cfg->arch.reg_local0, cfg->arch.n_out_regs, 0);
3055                         ia64_mov_from_br (code, cfg->arch.reg_saved_b0, IA64_B0);
3056                         ia64_mov (code, cfg->arch.reg_saved_sp, IA64_SP);
3057                         ia64_mov (code, cfg->frame_reg, IA64_R15);
3058                         /* Signal to endfilter that we are called by call_filter */
3059                         ia64_mov (code, GP_SCRATCH_REG, IA64_R0);
3060
3061                         /* Branch target: */
3062                         if (ia64_is_imm14 (spvar->inst_offset)) 
3063                                 ia64_adds_imm (code, GP_SCRATCH_REG2, spvar->inst_offset, cfg->frame_reg);
3064                         else {
3065                                 ia64_movl (code, GP_SCRATCH_REG2, spvar->inst_offset);
3066                                 ia64_add (code, GP_SCRATCH_REG2, cfg->frame_reg, GP_SCRATCH_REG2);
3067                         }
3068
3069                         /* Save the return address */                           
3070                         ia64_st8_hint (code, GP_SCRATCH_REG2, GP_SCRATCH_REG, 0);
3071                         ia64_codegen_set_one_ins_per_bundle (code, FALSE);
3072
3073                         break;
3074                 }
3075                 case OP_ENDFINALLY:
3076                 case OP_ENDFILTER: {
3077                         /* FIXME: Return the value in ENDFILTER */
3078                         MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
3079
3080                         /* Load the return address */
3081                         if (ia64_is_imm14 (spvar->inst_offset)) {
3082                                 ia64_adds_imm (code, GP_SCRATCH_REG, spvar->inst_offset, cfg->frame_reg);
3083                         } else {
3084                                 ia64_movl (code, GP_SCRATCH_REG, spvar->inst_offset);
3085                                 ia64_add (code, GP_SCRATCH_REG, cfg->frame_reg, GP_SCRATCH_REG);
3086                         }
3087                         ia64_ld8_hint (code, GP_SCRATCH_REG, GP_SCRATCH_REG, 0);
3088
3089                         /* Test caller */
3090                         ia64_cmp_eq (code, 6, 7, GP_SCRATCH_REG, IA64_R0);
3091                         ia64_br_cond_pred (code, 7, 4);
3092
3093                         /* Called by call_filter */
3094                         /* Pop frame */
3095                         ia64_mov_to_ar_i (code, IA64_PFS, cfg->arch.reg_saved_ar_pfs);
3096                         ia64_mov_to_br (code, IA64_B0, cfg->arch.reg_saved_b0);
3097                         ia64_br_ret_reg (code, IA64_B0);                        
3098
3099                         /* Called by CALL_HANDLER */
3100                         ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG);
3101                         ia64_br_cond_reg (code, IA64_B6);
3102                         break;
3103                 }
3104                 case OP_THROW:
3105                         ia64_mov (code, cfg->arch.reg_out0, ins->sreg1);
3106                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3107                                                           (gpointer)"mono_arch_throw_exception");
3108
3109                         /* 
3110                          * This might be the last instruction in the method, so add a dummy
3111                          * instruction so the unwinder will work.
3112                          */
3113                         ia64_break_i (code, 0);
3114                         break;
3115                 case OP_RETHROW:
3116                         ia64_mov (code, cfg->arch.reg_out0, ins->sreg1);
3117                         code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, 
3118                                                           (gpointer)"mono_arch_rethrow_exception");
3119
3120                         ia64_break_i (code, 0);
3121                         break;
3122
3123                 default:
3124                         g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__);
3125                         g_assert_not_reached ();
3126                 }
3127
3128                 if ((code.buf - cfg->native_code - offset) > max_len) {
3129                         g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
3130                                    mono_inst_name (ins->opcode), max_len, code.buf - cfg->native_code - offset);
3131                         g_assert_not_reached ();
3132                 }
3133                
3134                 cpos += max_len;
3135
3136                 last_ins = ins;
3137                 last_offset = offset;
3138         }
3139
3140         ia64_codegen_close (code);
3141
3142         cfg->code_len = code.buf - cfg->native_code;
3143 }
3144
3145 void
3146 mono_arch_register_lowlevel_calls (void)
3147 {
3148 }
3149
3150 static Ia64InsType ins_types_in_template [32][3] = {
3151         {IA64_INS_TYPE_M, IA64_INS_TYPE_I, IA64_INS_TYPE_I},
3152         {IA64_INS_TYPE_M, IA64_INS_TYPE_I, IA64_INS_TYPE_I},
3153         {IA64_INS_TYPE_M, IA64_INS_TYPE_I, IA64_INS_TYPE_I},
3154         {IA64_INS_TYPE_M, IA64_INS_TYPE_I, IA64_INS_TYPE_I},
3155         {IA64_INS_TYPE_M, IA64_INS_TYPE_LX, IA64_INS_TYPE_LX},
3156         {IA64_INS_TYPE_M, IA64_INS_TYPE_LX, IA64_INS_TYPE_LX},
3157         {0, 0, 0},
3158         {0, 0, 0},
3159         {IA64_INS_TYPE_M, IA64_INS_TYPE_M, IA64_INS_TYPE_I},
3160         {IA64_INS_TYPE_M, IA64_INS_TYPE_M, IA64_INS_TYPE_I},
3161         {IA64_INS_TYPE_M, IA64_INS_TYPE_M, IA64_INS_TYPE_I},
3162         {IA64_INS_TYPE_M, IA64_INS_TYPE_M, IA64_INS_TYPE_I},
3163         {IA64_INS_TYPE_M, IA64_INS_TYPE_F, IA64_INS_TYPE_I},
3164         {IA64_INS_TYPE_M, IA64_INS_TYPE_F, IA64_INS_TYPE_I},
3165         {IA64_INS_TYPE_M, IA64_INS_TYPE_M, IA64_INS_TYPE_F},
3166         {IA64_INS_TYPE_M, IA64_INS_TYPE_M, IA64_INS_TYPE_F},
3167         {IA64_INS_TYPE_M, IA64_INS_TYPE_I, IA64_INS_TYPE_B},
3168         {IA64_INS_TYPE_M, IA64_INS_TYPE_I, IA64_INS_TYPE_B},
3169         {IA64_INS_TYPE_M, IA64_INS_TYPE_B, IA64_INS_TYPE_B},
3170         {IA64_INS_TYPE_M, IA64_INS_TYPE_B, IA64_INS_TYPE_B},
3171         {0, 0, 0},
3172         {0, 0, 0},
3173         {IA64_INS_TYPE_B, IA64_INS_TYPE_B, IA64_INS_TYPE_B},
3174         {IA64_INS_TYPE_B, IA64_INS_TYPE_B, IA64_INS_TYPE_B},
3175         {IA64_INS_TYPE_M, IA64_INS_TYPE_M, IA64_INS_TYPE_B},
3176         {IA64_INS_TYPE_M, IA64_INS_TYPE_M, IA64_INS_TYPE_B},
3177         {0, 0, 0},
3178         {0, 0, 0},
3179         {IA64_INS_TYPE_M, IA64_INS_TYPE_F, IA64_INS_TYPE_B},
3180         {IA64_INS_TYPE_M, IA64_INS_TYPE_F, IA64_INS_TYPE_B},
3181         {0, 0, 0},
3182         {0, 0, 0}
3183 };
3184
3185 static gboolean stops_in_template [32][3] = {
3186         { FALSE, FALSE, FALSE },
3187         { FALSE, FALSE, TRUE },
3188         { FALSE, TRUE, FALSE },
3189         { FALSE, TRUE, TRUE },
3190         { FALSE, FALSE, FALSE },
3191         { FALSE, FALSE, TRUE },
3192         { FALSE, FALSE, FALSE },
3193         { FALSE, FALSE, FALSE },
3194
3195         { FALSE, FALSE, FALSE },
3196         { FALSE, FALSE, TRUE },
3197         { TRUE, FALSE, FALSE },
3198         { TRUE, FALSE, TRUE },
3199         { FALSE, FALSE, FALSE },
3200         { FALSE, FALSE, TRUE },
3201         { FALSE, FALSE, FALSE },
3202         { FALSE, FALSE, TRUE },
3203
3204         { FALSE, FALSE, FALSE },
3205         { FALSE, FALSE, TRUE },
3206         { FALSE, FALSE, FALSE },
3207         { FALSE, FALSE, TRUE },
3208         { FALSE, FALSE, FALSE },
3209         { FALSE, FALSE, FALSE },
3210         { FALSE, FALSE, FALSE },
3211         { FALSE, FALSE, TRUE },
3212
3213         { FALSE, FALSE, FALSE },
3214         { FALSE, FALSE, TRUE },
3215         { FALSE, FALSE, FALSE },
3216         { FALSE, FALSE, FALSE },
3217         { FALSE, FALSE, FALSE },
3218         { FALSE, FALSE, TRUE },
3219         { FALSE, FALSE, FALSE },
3220         { FALSE, FALSE, FALSE }
3221 };
3222
3223 static int last_stop_in_template [32] = {
3224         -1, 2, 1, 2, -1, 2, -1, -1,
3225         -1, 2, 0, 2, -1, 2, -1, 2,
3226         -1, 2, -1, 2, -1, -1, -1, 2,
3227         -1, 2, -1, -1, -1, 2, -1, -1
3228 };
3229
3230 static guint64 nops_for_ins_types [6] = {
3231         IA64_NOP_I,
3232         IA64_NOP_I,
3233         IA64_NOP_M,
3234         IA64_NOP_F,
3235         IA64_NOP_B,
3236         IA64_NOP_X
3237 };
3238
3239 #define ITYPE_MATCH(itype1, itype2) (((itype1) == (itype2)) || (((itype2) == IA64_INS_TYPE_A) && (((itype1) == IA64_INS_TYPE_I) || ((itype1) == IA64_INS_TYPE_M))))
3240
3241 /* 
3242  * Debugging support
3243  */
3244
3245 #if 0
3246 #define DEBUG_INS_SCHED(a) do { a; } while (0)
3247 #else
3248 #define DEBUG_INS_SCHED(a)
3249 #endif
3250
3251 static void
3252 ia64_analyze_deps (Ia64CodegenState *code, int *deps_start, int *stops)
3253 {
3254         int i, pos, ins_index, current_deps_start, current_ins_start, reg;
3255         guint8 *deps = code->dep_info;
3256         gboolean need_stop, no_stop;
3257
3258         for (i = 0; i < code->nins; ++i)
3259                 stops [i] = FALSE;
3260         
3261         ins_index = 0;
3262         current_deps_start = 0;
3263         current_ins_start = 0;
3264         deps_start [ins_index] = current_ins_start;
3265         pos = 0;
3266         no_stop = FALSE;
3267         DEBUG_INS_SCHED (printf ("BEGIN.\n"));
3268         while (pos < code->dep_info_pos) {
3269                 need_stop = FALSE;
3270                 switch (deps [pos]) {
3271                 case IA64_END_OF_INS:
3272                         ins_index ++;
3273                         current_ins_start = pos + 2;
3274                         deps_start [ins_index] = current_ins_start;
3275                         no_stop = FALSE;
3276                         DEBUG_INS_SCHED (printf ("(%d) END INS.\n", ins_index - 1));
3277                         break;
3278                 case IA64_NONE:
3279                         break;
3280                 case IA64_READ_GR:
3281                         reg = deps [pos + 1];
3282
3283                         DEBUG_INS_SCHED (printf ("READ GR: %d\n", reg));
3284                         for (i = current_deps_start; i < current_ins_start; i += 2)
3285                                 if (deps [i] == IA64_WRITE_GR && deps [i + 1] == reg)
3286                                         need_stop = TRUE;
3287                         break;
3288                 case IA64_WRITE_GR:
3289                         reg = code->dep_info [pos + 1];
3290
3291                         DEBUG_INS_SCHED (printf ("WRITE GR: %d\n", reg));
3292                         for (i = current_deps_start; i < current_ins_start; i += 2)
3293                                 if (deps [i] == IA64_WRITE_GR && deps [i + 1] == reg)
3294                                         need_stop = TRUE;
3295                         break;
3296                 case IA64_READ_PR:
3297                         reg = deps [pos + 1];
3298
3299                         DEBUG_INS_SCHED (printf ("READ PR: %d\n", reg));
3300                         for (i = current_deps_start; i < current_ins_start; i += 2)
3301                                 if (((deps [i] == IA64_WRITE_PR) || (deps [i] == IA64_WRITE_PR_FLOAT)) && deps [i + 1] == reg)
3302                                         need_stop = TRUE;
3303                         break;
3304                 case IA64_READ_PR_BRANCH:
3305                         reg = deps [pos + 1];
3306
3307                         /* Writes to prs by non-float instructions are visible to branches */
3308                         DEBUG_INS_SCHED (printf ("READ PR BRANCH: %d\n", reg));
3309                         for (i = current_deps_start; i < current_ins_start; i += 2)
3310                                 if (deps [i] == IA64_WRITE_PR_FLOAT && deps [i + 1] == reg)
3311                                         need_stop = TRUE;
3312                         break;
3313                 case IA64_WRITE_PR:
3314                         reg = code->dep_info [pos + 1];
3315
3316                         DEBUG_INS_SCHED (printf ("WRITE PR: %d\n", reg));
3317                         for (i = current_deps_start; i < current_ins_start; i += 2)
3318                                 if (((deps [i] == IA64_WRITE_PR) || (deps [i] == IA64_WRITE_PR_FLOAT)) && deps [i + 1] == reg)
3319                                         need_stop = TRUE;
3320                         break;
3321                 case IA64_WRITE_PR_FLOAT:
3322                         reg = code->dep_info [pos + 1];
3323
3324                         DEBUG_INS_SCHED (printf ("WRITE PR FP: %d\n", reg));
3325                         for (i = current_deps_start; i < current_ins_start; i += 2)
3326                                 if (((deps [i] == IA64_WRITE_GR) || (deps [i] == IA64_WRITE_PR_FLOAT)) && deps [i + 1] == reg)
3327                                         need_stop = TRUE;
3328                         break;
3329                 case IA64_READ_BR:
3330                         reg = deps [pos + 1];
3331
3332                         DEBUG_INS_SCHED (printf ("READ BR: %d\n", reg));
3333                         for (i = current_deps_start; i < current_ins_start; i += 2)
3334                                 if (deps [i] == IA64_WRITE_BR && deps [i + 1] == reg)
3335                                         need_stop = TRUE;
3336                         break;
3337                 case IA64_WRITE_BR:
3338                         reg = code->dep_info [pos + 1];
3339
3340                         DEBUG_INS_SCHED (printf ("WRITE BR: %d\n", reg));
3341                         for (i = current_deps_start; i < current_ins_start; i += 2)
3342                                 if (deps [i] == IA64_WRITE_BR && deps [i + 1] == reg)
3343                                         need_stop = TRUE;
3344                         break;
3345                 case IA64_READ_BR_BRANCH:
3346                         reg = deps [pos + 1];
3347
3348                         /* Writes to brs are visible to branches */
3349                         DEBUG_INS_SCHED (printf ("READ BR BRACH: %d\n", reg));
3350                         break;
3351                 case IA64_READ_FR:
3352                         reg = deps [pos + 1];
3353
3354                         DEBUG_INS_SCHED (printf ("READ BR: %d\n", reg));
3355                         for (i = current_deps_start; i < current_ins_start; i += 2)
3356                                 if (deps [i] == IA64_WRITE_FR && deps [i + 1] == reg)
3357                                         need_stop = TRUE;
3358                         break;
3359                 case IA64_WRITE_FR:
3360                         reg = code->dep_info [pos + 1];
3361
3362                         DEBUG_INS_SCHED (printf ("WRITE BR: %d\n", reg));
3363                         for (i = current_deps_start; i < current_ins_start; i += 2)
3364                                 if (deps [i] == IA64_WRITE_FR && deps [i + 1] == reg)
3365                                         need_stop = TRUE;
3366                         break;
3367                 case IA64_READ_AR:
3368                         reg = deps [pos + 1];
3369
3370                         DEBUG_INS_SCHED (printf ("READ AR: %d\n", reg));
3371                         for (i = current_deps_start; i < current_ins_start; i += 2)
3372                                 if (deps [i] == IA64_WRITE_AR && deps [i + 1] == reg)
3373                                         need_stop = TRUE;
3374                         break;
3375                 case IA64_WRITE_AR:
3376                         reg = code->dep_info [pos + 1];
3377
3378                         DEBUG_INS_SCHED (printf ("WRITE AR: %d\n", reg));
3379                         for (i = current_deps_start; i < current_ins_start; i += 2)
3380                                 if (deps [i] == IA64_WRITE_AR && deps [i + 1] == reg)
3381                                         need_stop = TRUE;
3382                         break;
3383                 case IA64_NO_STOP:
3384                         /* 
3385                          * Explicitly indicate that a stop is not required. Useful for
3386                          * example when two predicated instructions with negated predicates
3387                          * write the same registers.
3388                          */
3389                         no_stop = TRUE;
3390                         break;
3391                 default:
3392                         g_assert_not_reached ();
3393                 }
3394                 pos += 2;
3395
3396                 if (need_stop && !no_stop) {
3397                         g_assert (ins_index > 0);
3398                         stops [ins_index - 1] = 1;
3399
3400                         DEBUG_INS_SCHED (printf ("STOP\n"));
3401                         current_deps_start = current_ins_start;
3402
3403                         /* Skip remaining deps for this instruction */
3404                         while (deps [pos] != IA64_END_OF_INS)
3405                                 pos += 2;
3406                 }
3407         }
3408
3409         if (code->nins > 0) {
3410                 /* No dependency info for the last instruction */
3411                 stops [code->nins - 1] = 1;
3412         }
3413
3414         deps_start [code->nins] = code->dep_info_pos;
3415 }
3416
3417 static void
3418 ia64_real_emit_bundle (Ia64CodegenState *code, int *deps_start, int *stops, int n, guint64 template, guint64 ins1, guint64 ins2, guint64 ins3, guint8 nops)
3419 {
3420         int stop_pos, i, deps_to_shift, dep_shift;
3421
3422         g_assert (n <= code->nins);
3423
3424         // if (n > 1) printf ("FOUND: %ld.\n", template);
3425
3426         ia64_emit_bundle_template (code, template, ins1, ins2, ins3);
3427
3428         stop_pos = last_stop_in_template [template] + 1;
3429         if (stop_pos > n)
3430                 stop_pos = n;
3431
3432         /* Compute the number of 'real' instructions before the stop */
3433         deps_to_shift = stop_pos;
3434         if (stop_pos >= 3 && (nops & (1 << 2)))
3435                 deps_to_shift --;
3436         if (stop_pos >= 2 && (nops & (1 << 1)))
3437                 deps_to_shift --;
3438         if (stop_pos >= 1 && (nops & (1 << 0)))
3439                 deps_to_shift --;
3440
3441         /* 
3442          * We have to keep some dependencies whose instructions have been shifted
3443          * out of the buffer. So nullify the end_of_ins markers in the dependency
3444          * array.
3445          */
3446         for (i = deps_start [deps_to_shift]; i < deps_start [n]; i += 2)
3447                 if (code->dep_info [i] == IA64_END_OF_INS)
3448                         code->dep_info [i] = IA64_NONE;
3449
3450         g_assert (deps_start [deps_to_shift] <= code->dep_info_pos);
3451         memcpy (code->dep_info, &code->dep_info [deps_start [deps_to_shift]], code->dep_info_pos - deps_start [deps_to_shift]);
3452         code->dep_info_pos = code->dep_info_pos - deps_start [deps_to_shift];
3453
3454         dep_shift = deps_start [deps_to_shift];
3455         for (i = 0; i < code->nins + 1 - n; ++i)
3456                 deps_start [i] = deps_start [n + i] - dep_shift;
3457
3458         /* Determine the exact positions of instructions with unwind ops */
3459         if (code->unw_op_count) {
3460                 int ins_pos [16];
3461                 int curr_ins, curr_ins_pos;
3462
3463                 curr_ins = 0;
3464                 curr_ins_pos = ((code->buf - code->region_start - 16) / 16) * 3;
3465                 for (i = 0; i < 3; ++i) {
3466                         if (! (nops & (1 << i))) {
3467                                 ins_pos [curr_ins] = curr_ins_pos + i;
3468                                 curr_ins ++;
3469                         }
3470                 }
3471
3472                 for (i = code->unw_op_pos; i < code->unw_op_count; ++i) {
3473                         if (code->unw_ops_pos [i] < n) {
3474                                 code->unw_ops [i].when = ins_pos [code->unw_ops_pos [i]];
3475                                 //printf ("UNW-OP: %d -> %d\n", code->unw_ops_pos [i], code->unw_ops [i].when);
3476                         }
3477                 }
3478                 if (code->unw_op_pos < code->unw_op_count)
3479                         code->unw_op_pos += n;
3480         }
3481
3482         if (n == code->nins) {
3483                 code->template = 0;
3484                 code->nins = 0;
3485         }               
3486         else {
3487                 memcpy (&code->instructions [0], &code->instructions [n], (code->nins - n) * sizeof (guint64));
3488                 memcpy (&code->itypes [0], &code->itypes [n], (code->nins - n) * sizeof (int));
3489                 memcpy (&stops [0], &stops [n], (code->nins - n) * sizeof (int));
3490                 code->nins -= n;
3491         }
3492 }
3493
3494 void
3495 ia64_emit_bundle (Ia64CodegenState *code, gboolean flush)
3496 {
3497         int i, ins_type, template, nins_to_emit;
3498         int deps_start [16];
3499         int stops [16];
3500         gboolean found;
3501
3502         /*
3503          * We implement a simple scheduler which tries to put three instructions 
3504          * per bundle, then two, then one.
3505          */
3506         ia64_analyze_deps (code, deps_start, stops);
3507
3508         if ((code->nins >= 3) && !code->one_ins_per_bundle) {
3509                 /* Find a suitable template */
3510                 for (template = 0; template < 32; ++template) {
3511                         if (stops_in_template [template][0] != stops [0] ||
3512                                 stops_in_template [template][1] != stops [1] ||
3513                                 stops_in_template [template][2] != stops [2])
3514                                 continue;
3515
3516                         found = TRUE;
3517                         for (i = 0; i < 3; ++i) {
3518                                 ins_type = ins_types_in_template [template][i];
3519                                 switch (code->itypes [i]) {
3520                                 case IA64_INS_TYPE_A:
3521                                         found &= (ins_type == IA64_INS_TYPE_I) || (ins_type == IA64_INS_TYPE_M);
3522                                         break;
3523                                 default:
3524                                         found &= (ins_type == code->itypes [i]);
3525                                         break;
3526                                 }
3527                         }
3528
3529                         if (found)
3530                                 found = debug_ins_sched ();
3531
3532                         if (found) {
3533                                 ia64_real_emit_bundle (code, deps_start, stops, 3, template, code->instructions [0], code->instructions [1], code->instructions [2], 0);
3534                                 break;
3535                         }
3536                 }
3537         }
3538
3539         if (code->nins < IA64_INS_BUFFER_SIZE && !flush)
3540                 /* Wait for more instructions */
3541                 return;
3542
3543         /* If it didn't work out, try putting two instructions into one bundle */
3544         if ((code->nins >= 2) && !code->one_ins_per_bundle) {
3545                 /* Try a nop at the end */
3546                 for (template = 0; template < 32; ++template) {
3547                         if (stops_in_template [template][0] != stops [0] ||
3548                                 ((stops_in_template [template][1] != stops [1]) &&
3549                                  (stops_in_template [template][2] != stops [1])))
3550                                  
3551                                 continue;
3552
3553                         if (!ITYPE_MATCH (ins_types_in_template [template][0], code->itypes [0]) ||
3554                                 !ITYPE_MATCH (ins_types_in_template [template][1], code->itypes [1]))
3555                                 continue;
3556
3557                         if (!debug_ins_sched ())
3558                                 continue;
3559
3560                         ia64_real_emit_bundle (code, deps_start, stops, 2, template, code->instructions [0], code->instructions [1], nops_for_ins_types [ins_types_in_template [template][2]], 1 << 2);
3561                         break;
3562                 }
3563         }
3564
3565         if (code->nins < IA64_INS_BUFFER_SIZE && !flush)
3566                 /* Wait for more instructions */
3567                 return;
3568
3569         if ((code->nins >= 2) && !code->one_ins_per_bundle) {
3570                 /* Try a nop in the middle */
3571                 for (template = 0; template < 32; ++template) {
3572                         if (((stops_in_template [template][0] != stops [0]) &&
3573                                  (stops_in_template [template][1] != stops [0])) ||
3574                                 stops_in_template [template][2] != stops [1])
3575                                 continue;
3576
3577                         if (!ITYPE_MATCH (ins_types_in_template [template][0], code->itypes [0]) ||
3578                                 !ITYPE_MATCH (ins_types_in_template [template][2], code->itypes [1]))
3579                                 continue;
3580
3581                         if (!debug_ins_sched ())
3582                                 continue;
3583
3584                         ia64_real_emit_bundle (code, deps_start, stops, 2, template, code->instructions [0], nops_for_ins_types [ins_types_in_template [template][1]], code->instructions [1], 1 << 1);
3585                         break;
3586                 }
3587         }
3588
3589         if ((code->nins >= 2) && flush && !code->one_ins_per_bundle) {
3590                 /* Try a nop at the beginning */
3591                 for (template = 0; template < 32; ++template) {
3592                         if ((stops_in_template [template][1] != stops [0]) ||
3593                                 (stops_in_template [template][2] != stops [1]))
3594                                 continue;
3595
3596                         if (!ITYPE_MATCH (ins_types_in_template [template][1], code->itypes [0]) ||
3597                                 !ITYPE_MATCH (ins_types_in_template [template][2], code->itypes [1]))
3598                                 continue;
3599
3600                         if (!debug_ins_sched ())
3601                                 continue;
3602
3603                         ia64_real_emit_bundle (code, deps_start, stops, 2, template, nops_for_ins_types [ins_types_in_template [template][0]], code->instructions [0], code->instructions [1], 1 << 0);
3604                         break;
3605                 }
3606         }
3607
3608         if (code->nins < IA64_INS_BUFFER_SIZE && !flush)
3609                 /* Wait for more instructions */
3610                 return;
3611
3612         if (flush)
3613                 nins_to_emit = code->nins;
3614         else
3615                 nins_to_emit = 1;
3616
3617         while (nins_to_emit > 0) {
3618                 if (!debug_ins_sched ())
3619                         stops [0] = 1;
3620                 switch (code->itypes [0]) {
3621                 case IA64_INS_TYPE_A:
3622                         if (stops [0])
3623                                 ia64_real_emit_bundle (code, deps_start, stops, 1, IA64_TEMPLATE_MIIS, code->instructions [0], IA64_NOP_I, IA64_NOP_I, 0);
3624                         else
3625                                 ia64_real_emit_bundle (code, deps_start, stops, 1, IA64_TEMPLATE_MII, code->instructions [0], IA64_NOP_I, IA64_NOP_I, 0);
3626                         break;
3627                 case IA64_INS_TYPE_I:
3628                         if (stops [0])
3629                                 ia64_real_emit_bundle (code, deps_start, stops, 1, IA64_TEMPLATE_MIIS, IA64_NOP_M, code->instructions [0], IA64_NOP_I, 0);
3630                         else
3631                                 ia64_real_emit_bundle (code, deps_start, stops, 1, IA64_TEMPLATE_MII, IA64_NOP_M, code->instructions [0], IA64_NOP_I, 0);
3632                         break;
3633                 case IA64_INS_TYPE_M:
3634                         if (stops [0])
3635                                 ia64_real_emit_bundle (code, deps_start, stops, 1, IA64_TEMPLATE_MIIS, code->instructions [0], IA64_NOP_I, IA64_NOP_I, 0);
3636                         else
3637                                 ia64_real_emit_bundle (code, deps_start, stops, 1, IA64_TEMPLATE_MII, code->instructions [0], IA64_NOP_I, IA64_NOP_I, 0);
3638                         break;
3639                 case IA64_INS_TYPE_B:
3640                         if (stops [0])
3641                                 ia64_real_emit_bundle (code, deps_start, stops, 1, IA64_TEMPLATE_MIBS, IA64_NOP_M, IA64_NOP_I, code->instructions [0], 0);
3642                         else
3643                                 ia64_real_emit_bundle (code, deps_start, stops, 1, IA64_TEMPLATE_MIB, IA64_NOP_M, IA64_NOP_I, code->instructions [0], 0);
3644                         break;
3645                 case IA64_INS_TYPE_F:
3646                         if (stops [0])
3647                                 ia64_real_emit_bundle (code, deps_start, stops, 1, IA64_TEMPLATE_MFIS, IA64_NOP_M, code->instructions [0], IA64_NOP_I, 0);
3648                         else
3649                                 ia64_real_emit_bundle (code, deps_start, stops, 1, IA64_TEMPLATE_MFI, IA64_NOP_M, code->instructions [0], IA64_NOP_I, 0);
3650                         break;
3651                 case IA64_INS_TYPE_LX:
3652                         if (stops [0] || stops [1])
3653                                 ia64_real_emit_bundle (code, deps_start, stops, 2, IA64_TEMPLATE_MLXS, IA64_NOP_M, code->instructions [0], code->instructions [1], 0);
3654                         else
3655                                 ia64_real_emit_bundle (code, deps_start, stops, 2, IA64_TEMPLATE_MLX, IA64_NOP_M, code->instructions [0], code->instructions [1], 0);
3656                         nins_to_emit --;
3657                         break;
3658                 default:
3659                         g_assert_not_reached ();
3660                 }
3661                 nins_to_emit --;
3662         }
3663 }
3664
3665 unw_dyn_region_info_t*
3666 mono_ia64_create_unwind_region (Ia64CodegenState *code)
3667 {
3668         unw_dyn_region_info_t *r;
3669
3670         g_assert (code->nins == 0);
3671         r = g_malloc0 (_U_dyn_region_info_size (code->unw_op_count));
3672         memcpy (&r->op, &code->unw_ops, sizeof (unw_dyn_op_t) * code->unw_op_count);
3673         r->op_count = code->unw_op_count;
3674         r->insn_count = ((code->buf - code->region_start) >> 4) * 3;
3675         code->unw_op_count = 0;
3676         code->unw_op_pos = 0;
3677         code->region_start = code->buf;
3678
3679         return r;
3680 }
3681
3682 static void 
3683 ia64_patch (unsigned char* code, gpointer target)
3684 {
3685         int template, i;
3686         guint64 instructions [3];
3687         guint8 gen_buf [16];
3688         Ia64CodegenState gen;
3689         int ins_to_skip;
3690         gboolean found;
3691
3692         /* 
3693          * code encodes both the position inside the buffer and code.nins when
3694          * the instruction was emitted.
3695          */
3696         ins_to_skip = (guint64)code % 16;
3697         code = (unsigned char*)((guint64)code & ~15);
3698
3699         /*
3700          * Search for the first instruction which is 'patchable', skipping
3701          * ins_to_skip instructions.
3702          */
3703
3704         while (TRUE) {
3705
3706         template = ia64_bundle_template (code);
3707         instructions [0] = ia64_bundle_ins1 (code);
3708         instructions [1] = ia64_bundle_ins2 (code);
3709         instructions [2] = ia64_bundle_ins3 (code);
3710
3711         ia64_codegen_init (gen, gen_buf);
3712
3713         found = FALSE;
3714         for (i = 0; i < 3; ++i) {
3715                 guint64 ins = instructions [i];
3716                 int opcode = ia64_ins_opcode (ins);
3717
3718                 if (ins == nops_for_ins_types [ins_types_in_template [template][i]])
3719                         continue;
3720
3721                 if (ins_to_skip) {
3722                         ins_to_skip --;
3723                         continue;
3724                 }
3725
3726                 switch (ins_types_in_template [template][i]) {
3727                 case IA64_INS_TYPE_A:
3728                 case IA64_INS_TYPE_M:
3729                         if ((opcode == 8) && (ia64_ins_x2a (ins) == 2) && (ia64_ins_ve (ins) == 0)) {
3730                                 /* adds */
3731                                 ia64_adds_imm_pred (gen, ia64_ins_qp (ins), ia64_ins_r1 (ins), (guint64)target, ia64_ins_r3 (ins));
3732                                 instructions [i] = gen.instructions [0];
3733                                 found = TRUE;
3734                         }
3735                         else
3736                                 NOT_IMPLEMENTED;
3737                         break;
3738                 case IA64_INS_TYPE_B:
3739                         if ((opcode == 4) && (ia64_ins_btype (ins) == 0)) {
3740                                 /* br.cond */
3741                                 gint64 disp = ((guint8*)target - code) >> 4;
3742
3743                                 /* FIXME: hints */
3744                                 ia64_br_cond_hint_pred (gen, ia64_ins_qp (ins), disp, 0, 0, 0);
3745                                 
3746                                 instructions [i] = gen.instructions [0];
3747                                 found = TRUE;
3748                         }
3749                         else if (opcode == 5) {
3750                                 /* br.call */
3751                                 gint64 disp = ((guint8*)target - code) >> 4;
3752
3753                                 /* FIXME: hints */
3754                                 ia64_br_call_hint_pred (gen, ia64_ins_qp (ins), ia64_ins_b1 (ins), disp, 0, 0, 0);
3755                                 instructions [i] = gen.instructions [0];
3756                                 found = TRUE;
3757                         }
3758                         else
3759                                 NOT_IMPLEMENTED;
3760                         break;
3761                 case IA64_INS_TYPE_LX:
3762                         if (i == 1)
3763                                 break;
3764
3765                         if ((opcode == 6) && (ia64_ins_vc (ins) == 0)) {
3766                                 /* movl */
3767                                 ia64_movl_pred (gen, ia64_ins_qp (ins), ia64_ins_r1 (ins), target);
3768                                 instructions [1] = gen.instructions [0];
3769                                 instructions [2] = gen.instructions [1];
3770                                 found = TRUE;
3771                         }
3772                         else
3773                                 NOT_IMPLEMENTED;
3774
3775                         break;
3776                 default:
3777                         NOT_IMPLEMENTED;
3778                 }
3779
3780                 if (found) {
3781                         /* Rewrite code */
3782                         ia64_codegen_init (gen, code);
3783                         ia64_emit_bundle_template (&gen, template, instructions [0], instructions [1], instructions [2]);
3784                         return;
3785                 }
3786         }
3787
3788         code += 16;
3789         }
3790 }
3791
3792 void
3793 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
3794 {
3795         MonoJumpInfo *patch_info;
3796
3797         for (patch_info = ji; patch_info; patch_info = patch_info->next) {
3798                 unsigned char *ip = patch_info->ip.i + code;
3799                 const unsigned char *target;
3800
3801                 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
3802
3803                 if (patch_info->type == MONO_PATCH_INFO_NONE)
3804                         continue;
3805                 if (mono_compile_aot) {
3806                         NOT_IMPLEMENTED;
3807                 }
3808
3809                 ia64_patch (ip, (gpointer)target);
3810         }
3811 }
3812
3813 guint8 *
3814 mono_arch_emit_prolog (MonoCompile *cfg)
3815 {
3816         MonoMethod *method = cfg->method;
3817         MonoMethodSignature *sig;
3818         MonoInst *inst;
3819         int alloc_size, pos, i;
3820         Ia64CodegenState code;
3821         CallInfo *cinfo;
3822         
3823         sig = mono_method_signature (method);
3824         pos = 0;
3825
3826         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
3827
3828         cfg->code_size =  MAX (((MonoMethodNormal *)method)->header->code_size * 4, 512);
3829
3830         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
3831                 cfg->code_size += 1024;
3832         if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
3833                 cfg->code_size += 1024;
3834
3835         cfg->native_code = g_malloc (cfg->code_size);
3836
3837         ia64_codegen_init (code, cfg->native_code);
3838
3839         alloc_size = ALIGN_TO (cfg->stack_offset, MONO_ARCH_FRAME_ALIGNMENT);
3840         if (cfg->param_area)
3841                 alloc_size += cfg->param_area;
3842         if (alloc_size)
3843                 /* scratch area */
3844                 alloc_size += 16;
3845         alloc_size = ALIGN_TO (alloc_size, MONO_ARCH_FRAME_ALIGNMENT);
3846
3847         if (cfg->flags & MONO_CFG_HAS_ALLOCA)
3848                 /* Force sp to be saved/restored */
3849                 alloc_size += MONO_ARCH_FRAME_ALIGNMENT;
3850
3851         cfg->arch.stack_alloc_size = alloc_size;
3852
3853         pos = 0;
3854
3855         if (method->save_lmf) {
3856                 /* No LMF on IA64 */
3857         }
3858
3859         alloc_size -= pos;
3860
3861         ia64_unw_save_reg (code, UNW_IA64_AR_PFS, UNW_IA64_GR + cfg->arch.reg_saved_ar_pfs);
3862         ia64_alloc (code, cfg->arch.reg_saved_ar_pfs, cfg->arch.reg_local0 - cfg->arch.reg_in0, cfg->arch.reg_out0 - cfg->arch.reg_local0, cfg->arch.n_out_regs, 0);
3863         ia64_unw_save_reg (code, UNW_IA64_RP, UNW_IA64_GR + cfg->arch.reg_saved_b0);
3864         ia64_mov_from_br (code, cfg->arch.reg_saved_b0, IA64_B0);
3865
3866         if ((alloc_size || cinfo->stack_usage) && !cfg->arch.omit_fp) {
3867                 ia64_unw_save_reg (code, UNW_IA64_SP, UNW_IA64_GR + cfg->arch.reg_saved_sp);
3868                 ia64_mov (code, cfg->arch.reg_saved_sp, IA64_SP);
3869                 if (cfg->frame_reg != cfg->arch.reg_saved_sp)
3870                         ia64_mov (code, cfg->frame_reg, IA64_SP);
3871         }
3872
3873         if (alloc_size) {
3874 #if defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
3875                 int pagesize = getpagesize ();
3876
3877                 if (alloc_size >= pagesize) {
3878                         gint32 remaining_size = alloc_size;
3879
3880                         /* Generate stack touching code */
3881                         ia64_mov (code, GP_SCRATCH_REG, IA64_SP);                       
3882                         while (remaining_size >= pagesize) {
3883                                 ia64_movl (code, GP_SCRATCH_REG2, pagesize);
3884                                 ia64_sub (code, GP_SCRATCH_REG, GP_SCRATCH_REG, GP_SCRATCH_REG2);
3885                                 ia64_ld8 (code, GP_SCRATCH_REG2, GP_SCRATCH_REG);
3886                                 remaining_size -= pagesize;
3887                         }
3888                 }
3889 #endif
3890                 if (ia64_is_imm14 (-alloc_size)) {
3891                         if (cfg->arch.omit_fp)
3892                                 ia64_unw_add (code, UNW_IA64_SP, (-alloc_size));
3893                         ia64_adds_imm (code, IA64_SP, (-alloc_size), IA64_SP);
3894                 }
3895                 else {
3896                         ia64_movl (code, GP_SCRATCH_REG, -alloc_size);
3897                         if (cfg->arch.omit_fp)
3898                                 ia64_unw_add (code, UNW_IA64_SP, (-alloc_size));
3899                         ia64_add (code, IA64_SP, GP_SCRATCH_REG, IA64_SP);
3900                 }
3901         }
3902
3903         ia64_begin_bundle (code);
3904
3905         /* Initialize unwind info */
3906         cfg->arch.r_pro = mono_ia64_create_unwind_region (&code);
3907
3908         if (sig->ret->type != MONO_TYPE_VOID) {
3909                 if ((cinfo->ret.storage == ArgInIReg) && (cfg->ret->opcode != OP_REGVAR)) {
3910                         /* Save volatile arguments to the stack */
3911                         NOT_IMPLEMENTED;
3912                 }
3913         }
3914
3915         /* Keep this in sync with emit_load_volatile_arguments */
3916         for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
3917                 ArgInfo *ainfo = cinfo->args + i;
3918                 gint32 stack_offset;
3919                 MonoType *arg_type;
3920
3921                 inst = cfg->args [i];
3922
3923                 if (sig->hasthis && (i == 0))
3924                         arg_type = &mono_defaults.object_class->byval_arg;
3925                 else
3926                         arg_type = sig->params [i - sig->hasthis];
3927
3928                 arg_type = mono_type_get_underlying_type (arg_type);
3929
3930                 stack_offset = ainfo->offset + ARGS_OFFSET;
3931
3932                 /*
3933                  * FIXME: Native code might pass non register sized integers 
3934                  * without initializing the upper bits.
3935                  */
3936                 if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED && !arg_type->byref && ainfo->storage == ArgInIReg) {
3937                         int reg = cfg->arch.reg_in0 + ainfo->reg;
3938
3939                         switch (mono_type_to_load_membase (cfg, arg_type)) {
3940                         case OP_LOADI1_MEMBASE:
3941                                 ia64_sxt1 (code, reg, reg);
3942                                 break;
3943                         case OP_LOADU1_MEMBASE:
3944                                 ia64_zxt1 (code, reg, reg);
3945                                 break;
3946                         case OP_LOADI2_MEMBASE:
3947                                 ia64_sxt2 (code, reg, reg);
3948                                 break;
3949                         case OP_LOADU2_MEMBASE:
3950                                 ia64_zxt2 (code, reg, reg);
3951                                 break;
3952                         default:
3953                                 break;
3954                         }
3955                 }
3956
3957                 /* Save volatile arguments to the stack */
3958                 if (inst->opcode != OP_REGVAR) {
3959                         switch (ainfo->storage) {
3960                         case ArgInIReg:
3961                         case ArgInFloatReg:
3962                         case ArgInFloatRegR4:
3963                                 g_assert (inst->opcode == OP_REGOFFSET);
3964                                 if (ia64_is_adds_imm (inst->inst_offset))
3965                                         ia64_adds_imm (code, GP_SCRATCH_REG, inst->inst_offset, inst->inst_basereg);
3966                                 else {
3967                                         ia64_movl (code, GP_SCRATCH_REG2, inst->inst_offset);
3968                                         ia64_add (code, GP_SCRATCH_REG, GP_SCRATCH_REG, GP_SCRATCH_REG2);
3969                                 }
3970                                 if (arg_type->byref)
3971                                         ia64_st8_hint (code, GP_SCRATCH_REG, cfg->arch.reg_in0 + ainfo->reg, 0);
3972                                 else {
3973                                         switch (arg_type->type) {
3974                                         case MONO_TYPE_R4:
3975                                                 ia64_stfs_hint (code, GP_SCRATCH_REG, ainfo->reg, 0);
3976                                                 break;
3977                                         case MONO_TYPE_R8:
3978                                                 ia64_stfd_hint (code, GP_SCRATCH_REG, ainfo->reg, 0);
3979                                                 break;
3980                                         default:
3981                                                 ia64_st8_hint (code, GP_SCRATCH_REG, cfg->arch.reg_in0 + ainfo->reg, 0);
3982                                                 break;
3983                                         }
3984                                 }
3985                                 break;
3986                         case ArgOnStack:
3987                                 break;
3988                         case ArgAggregate:
3989                                 if (ainfo->nslots != ainfo->nregs)
3990                                         NOT_IMPLEMENTED;
3991
3992                                 g_assert (inst->opcode == OP_REGOFFSET);
3993                                 ia64_adds_imm (code, GP_SCRATCH_REG, inst->inst_offset, inst->inst_basereg);
3994                                 for (i = 0; i < ainfo->nregs; ++i) {
3995                                         switch (ainfo->atype) {
3996                                         case AggregateNormal:
3997                                                 ia64_st8_inc_imm_hint (code, GP_SCRATCH_REG, cfg->arch.reg_in0 + ainfo->reg + i, sizeof (gpointer), 0);
3998                                                 break;
3999                                         case AggregateSingleHFA:
4000                                                 ia64_stfs_inc_imm_hint (code, GP_SCRATCH_REG, ainfo->reg + i, 4, 0);
4001                                                 break;
4002                                         case AggregateDoubleHFA:
4003                                                 ia64_stfd_inc_imm_hint (code, GP_SCRATCH_REG, ainfo->reg + i, sizeof (gpointer), 0);
4004                                                 break;
4005                                         default:
4006                                                 NOT_IMPLEMENTED;
4007                                         }
4008                                 }
4009                                 break;
4010                         default:
4011                                 g_assert_not_reached ();
4012                         }
4013                 }
4014
4015                 if (inst->opcode == OP_REGVAR) {
4016                         /* Argument allocated to (non-volatile) register */
4017                         switch (ainfo->storage) {
4018                         case ArgInIReg:
4019                                 if (inst->dreg != cfg->arch.reg_in0 + ainfo->reg)
4020                                         ia64_mov (code, inst->dreg, cfg->arch.reg_in0 + ainfo->reg);
4021                                 break;
4022                         case ArgOnStack:
4023                                 ia64_adds_imm (code, GP_SCRATCH_REG, 16 + ainfo->offset, cfg->frame_reg);
4024                                 ia64_ld8 (code, inst->dreg, GP_SCRATCH_REG);
4025                                 break;
4026                         default:
4027                                 NOT_IMPLEMENTED;
4028                         }
4029                 }
4030         }
4031
4032         if (method->save_lmf) {
4033                 /* No LMF on IA64 */
4034         }
4035
4036         ia64_codegen_close (code);
4037
4038         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4039                 code.buf = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code.buf, TRUE);
4040
4041         cfg->code_len = code.buf - cfg->native_code;
4042
4043         g_assert (cfg->code_len < cfg->code_size);
4044
4045         cfg->arch.prolog_end_offset = cfg->code_len;
4046
4047         return code.buf;
4048 }
4049
4050 void
4051 mono_arch_emit_epilog (MonoCompile *cfg)
4052 {
4053         MonoMethod *method = cfg->method;
4054         int i, pos;
4055         int max_epilog_size = 16 * 4;
4056         Ia64CodegenState code;
4057         guint8 *buf;
4058         CallInfo *cinfo;
4059         ArgInfo *ainfo;
4060
4061         if (mono_jit_trace_calls != NULL)
4062                 max_epilog_size += 1024;
4063
4064         cfg->arch.epilog_begin_offset = cfg->code_len;
4065
4066         while (cfg->code_len + max_epilog_size > cfg->code_size) {
4067                 cfg->code_size *= 2;
4068                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4069                 mono_jit_stats.code_reallocs++;
4070         }
4071
4072         /* FIXME: Emit unwind info */
4073
4074         buf = cfg->native_code + cfg->code_len;
4075
4076         if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4077                 buf = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, buf, TRUE);
4078
4079         ia64_codegen_init (code, buf);
4080
4081         /* the code restoring the registers must be kept in sync with OP_JMP */
4082         pos = 0;
4083         
4084         if (method->save_lmf) {
4085                 /* No LMF on IA64 */
4086         }
4087
4088         /* Load returned vtypes into registers if needed */
4089         cinfo = get_call_info (cfg, cfg->mempool, mono_method_signature (method), FALSE);
4090         ainfo = &cinfo->ret;
4091         switch (ainfo->storage) {
4092         case ArgAggregate:
4093                 if (ainfo->nslots != ainfo->nregs)
4094                         NOT_IMPLEMENTED;
4095
4096                 g_assert (cfg->ret->opcode == OP_REGOFFSET);
4097                 ia64_adds_imm (code, GP_SCRATCH_REG, cfg->ret->inst_offset, cfg->ret->inst_basereg);
4098                 for (i = 0; i < ainfo->nregs; ++i) {
4099                         switch (ainfo->atype) {
4100                         case AggregateNormal:
4101                                 ia64_ld8_inc_imm_hint (code, ainfo->reg + i, GP_SCRATCH_REG, sizeof (gpointer), 0);
4102                                 break;
4103                         case AggregateSingleHFA:
4104                                 ia64_ldfs_inc_imm_hint (code, ainfo->reg + i, GP_SCRATCH_REG, 4, 0);
4105                                 break;
4106                         case AggregateDoubleHFA:
4107                                 ia64_ldfd_inc_imm_hint (code, ainfo->reg + i, GP_SCRATCH_REG, sizeof (gpointer), 0);
4108                                 break;
4109                         default:
4110                                 g_assert_not_reached ();
4111                         }
4112                 }
4113                 break;
4114         default:
4115                 break;
4116         }
4117
4118         ia64_begin_bundle (code);
4119
4120         code.region_start = cfg->native_code;
4121
4122         /* Label the unwind state at the start of the exception throwing region */
4123         //ia64_unw_label_state (code, 1234);
4124
4125         if (cfg->arch.stack_alloc_size) {
4126                 if (cfg->arch.omit_fp) {
4127                         if (ia64_is_imm14 (cfg->arch.stack_alloc_size)) {
4128                                 ia64_unw_pop_frames (code, 1);
4129                                 ia64_adds_imm (code, IA64_SP, (cfg->arch.stack_alloc_size), IA64_SP);
4130                         } else {
4131                                 ia64_movl (code, GP_SCRATCH_REG, cfg->arch.stack_alloc_size);
4132                                 ia64_unw_pop_frames (code, 1);
4133                                 ia64_add (code, IA64_SP, GP_SCRATCH_REG, IA64_SP);
4134                         }
4135                 }
4136                 else {
4137                         ia64_unw_pop_frames (code, 1);
4138                         ia64_mov (code, IA64_SP, cfg->arch.reg_saved_sp);
4139                 }
4140         }
4141         ia64_mov_to_ar_i (code, IA64_PFS, cfg->arch.reg_saved_ar_pfs);
4142         ia64_mov_ret_to_br (code, IA64_B0, cfg->arch.reg_saved_b0);
4143         ia64_br_ret_reg (code, IA64_B0);
4144
4145         ia64_codegen_close (code);
4146
4147         cfg->arch.r_epilog = mono_ia64_create_unwind_region (&code);
4148         cfg->arch.r_pro->next = cfg->arch.r_epilog;
4149
4150         cfg->code_len = code.buf - cfg->native_code;
4151
4152         g_assert (cfg->code_len < cfg->code_size);
4153 }
4154
4155 void
4156 mono_arch_emit_exceptions (MonoCompile *cfg)
4157 {
4158         MonoJumpInfo *patch_info;
4159         int i, nthrows;
4160         Ia64CodegenState code;
4161         gboolean empty = TRUE;
4162         //unw_dyn_region_info_t *r_exceptions;
4163         MonoClass *exc_classes [16];
4164         guint8 *exc_throw_start [16], *exc_throw_end [16];
4165         guint32 code_size = 0;
4166
4167         /* Compute needed space */
4168         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4169                 if (patch_info->type == MONO_PATCH_INFO_EXC)
4170                         code_size += 256;
4171                 if (patch_info->type == MONO_PATCH_INFO_R8)
4172                         code_size += 8 + 7; /* sizeof (double) + alignment */
4173                 if (patch_info->type == MONO_PATCH_INFO_R4)
4174                         code_size += 4 + 7; /* sizeof (float) + alignment */
4175         }
4176
4177         if (code_size == 0)
4178                 return;
4179
4180         while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4181                 cfg->code_size *= 2;
4182                 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4183                 mono_jit_stats.code_reallocs++;
4184         }
4185
4186         ia64_codegen_init (code, cfg->native_code + cfg->code_len);
4187
4188         /* The unwind state here is the same as before the epilog */
4189         //ia64_unw_copy_state (code, 1234);
4190
4191         /* add code to raise exceptions */
4192         /* FIXME: Optimize this */
4193         nthrows = 0;
4194         for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4195                 switch (patch_info->type) {
4196                 case MONO_PATCH_INFO_EXC: {
4197                         MonoClass *exc_class;
4198                         guint8* throw_ip;
4199                         guint8* buf;
4200                         guint64 exc_token_index;
4201
4202                         exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4203                         g_assert (exc_class);
4204                         exc_token_index = mono_metadata_token_index (exc_class->type_token);
4205                         throw_ip = cfg->native_code + patch_info->ip.i;
4206
4207                         ia64_begin_bundle (code);
4208
4209                         ia64_patch (cfg->native_code + patch_info->ip.i, code.buf);
4210
4211                         /* Find a throw sequence for the same exception class */
4212                         for (i = 0; i < nthrows; ++i)
4213                                 if (exc_classes [i] == exc_class)
4214                                         break;
4215
4216                         if (i < nthrows) {
4217                                 gint64 offset = exc_throw_end [i] - 16 - throw_ip;
4218
4219                                 if (ia64_is_adds_imm (offset))
4220                                         ia64_adds_imm (code, cfg->arch.reg_out0 + 1, offset, IA64_R0);
4221                                 else
4222                                         ia64_movl (code, cfg->arch.reg_out0 + 1, offset);
4223
4224                                 buf = code.buf + code.nins;
4225                                 ia64_br_cond_pred (code, 0, 0);
4226                                 ia64_begin_bundle (code);
4227                                 ia64_patch (buf, exc_throw_start [i]);
4228
4229                                 patch_info->type = MONO_PATCH_INFO_NONE;
4230                         }
4231                         else {
4232                                 /* Arg1 */
4233                                 buf = code.buf;
4234                                 ia64_movl (code, cfg->arch.reg_out0 + 1, 0);
4235
4236                                 ia64_begin_bundle (code);
4237
4238                                 if (nthrows < 16) {
4239                                         exc_classes [nthrows] = exc_class;
4240                                         exc_throw_start [nthrows] = code.buf;
4241                                 }
4242
4243                                 /* Arg2 */
4244                                 if (ia64_is_adds_imm (exc_token_index))
4245                                         ia64_adds_imm (code, cfg->arch.reg_out0 + 0, exc_token_index, IA64_R0);
4246                                 else
4247                                         ia64_movl (code, cfg->arch.reg_out0 + 0, exc_token_index);
4248
4249                                 patch_info->data.name = "mono_arch_throw_corlib_exception";
4250                                 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4251                                 patch_info->ip.i = code.buf + code.nins - cfg->native_code;
4252
4253                                 /* Indirect call */
4254                                 ia64_movl (code, GP_SCRATCH_REG, 0);
4255                                 ia64_ld8_inc_imm (code, GP_SCRATCH_REG2, GP_SCRATCH_REG, 8);
4256                                 ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG2);
4257                                 ia64_ld8 (code, IA64_GP, GP_SCRATCH_REG);
4258
4259                                 ia64_br_call_reg (code, IA64_B0, IA64_B6);
4260
4261                                 /* Patch up the throw offset */
4262                                 ia64_begin_bundle (code);
4263
4264                                 ia64_patch (buf, (gpointer)(code.buf - 16 - throw_ip));
4265
4266                                 if (nthrows < 16) {
4267                                         exc_throw_end [nthrows] = code.buf;
4268                                         nthrows ++;
4269                                 }
4270                         }
4271
4272                         empty = FALSE;
4273                         break;
4274                 }
4275                 default:
4276                         break;
4277                 }
4278         }
4279
4280         if (!empty)
4281                 /* The unwinder needs this to work */
4282                 ia64_break_i (code, 0);
4283
4284         ia64_codegen_close (code);
4285
4286         /* FIXME: */
4287         //r_exceptions = mono_ia64_create_unwind_region (&code);
4288         //cfg->arch.r_epilog = r_exceptions;
4289
4290         cfg->code_len = code.buf - cfg->native_code;
4291
4292         g_assert (cfg->code_len < cfg->code_size);
4293 }
4294
4295 void*
4296 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4297 {
4298         Ia64CodegenState code;
4299         CallInfo *cinfo = NULL;
4300         MonoMethodSignature *sig;
4301         MonoInst *ins;
4302         int i, n, stack_area = 0;
4303
4304         ia64_codegen_init (code, p);
4305
4306         /* Keep this in sync with mono_arch_get_argument_info */
4307
4308         if (enable_arguments) {
4309                 /* Allocate a new area on the stack and save arguments there */
4310                 sig = mono_method_signature (cfg->method);
4311
4312                 cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
4313
4314                 n = sig->param_count + sig->hasthis;
4315
4316                 stack_area = ALIGN_TO (n * 8, 16);
4317
4318                 if (n) {
4319                         ia64_movl (code, GP_SCRATCH_REG, stack_area);
4320
4321                         ia64_sub (code, IA64_SP, IA64_SP, GP_SCRATCH_REG);
4322
4323                         /* FIXME: Allocate out registers */
4324
4325                         ia64_mov (code, cfg->arch.reg_out0 + 1, IA64_SP);
4326
4327                         /* Required by the ABI */
4328                         ia64_adds_imm (code, IA64_SP, -16, IA64_SP);
4329
4330                         add_patch_info (cfg, code, MONO_PATCH_INFO_METHODCONST, cfg->method);
4331                         ia64_movl (code, cfg->arch.reg_out0 + 0, 0);
4332
4333                         /* Save arguments to the stack */
4334                         for (i = 0; i < n; ++i) {
4335                                 ins = cfg->args [i];
4336
4337                                 if (ins->opcode == OP_REGVAR) {
4338                                         ia64_movl (code, GP_SCRATCH_REG, (i * 8));
4339                                         ia64_add (code, GP_SCRATCH_REG, cfg->arch.reg_out0 + 1, GP_SCRATCH_REG);
4340                                         ia64_st8 (code, GP_SCRATCH_REG, ins->dreg);
4341                                 }
4342                                 else {
4343                                         ia64_movl (code, GP_SCRATCH_REG, ins->inst_offset);
4344                                         ia64_add (code, GP_SCRATCH_REG, ins->inst_basereg, GP_SCRATCH_REG);
4345                                         ia64_ld8 (code, GP_SCRATCH_REG2, GP_SCRATCH_REG);
4346                                         ia64_movl (code, GP_SCRATCH_REG, (i * 8));                              
4347                                         ia64_add (code, GP_SCRATCH_REG, cfg->arch.reg_out0 + 1, GP_SCRATCH_REG);
4348                                         ia64_st8 (code, GP_SCRATCH_REG, GP_SCRATCH_REG2);
4349                                 }
4350                         }
4351                 }
4352                 else
4353                         ia64_mov (code, cfg->arch.reg_out0 + 1, IA64_R0);
4354         }
4355         else
4356                 ia64_mov (code, cfg->arch.reg_out0 + 1, IA64_R0);
4357
4358         add_patch_info (cfg, code, MONO_PATCH_INFO_METHODCONST, cfg->method);
4359         ia64_movl (code, cfg->arch.reg_out0 + 0, 0);
4360
4361         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func);
4362
4363         if (enable_arguments && stack_area) {
4364                 ia64_movl (code, GP_SCRATCH_REG, stack_area);
4365
4366                 ia64_add (code, IA64_SP, IA64_SP, GP_SCRATCH_REG);
4367
4368                 ia64_adds_imm (code, IA64_SP, 16, IA64_SP);
4369         }
4370
4371         ia64_codegen_close (code);
4372
4373         return code.buf;
4374 }
4375
4376 void*
4377 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
4378 {
4379         Ia64CodegenState code;
4380         CallInfo *cinfo = NULL;
4381         MonoMethod *method = cfg->method;
4382         MonoMethodSignature *sig = mono_method_signature (cfg->method);
4383
4384         ia64_codegen_init (code, p);
4385
4386         cinfo = get_call_info (cfg, cfg->mempool, sig, FALSE);
4387
4388         /* Save return value + pass it to func */
4389         switch (cinfo->ret.storage) {
4390         case ArgNone:
4391                 break;
4392         case ArgInIReg:
4393                 ia64_mov (code, cfg->arch.reg_saved_return_val, cinfo->ret.reg);
4394                 ia64_mov (code, cfg->arch.reg_out0 + 1, cinfo->ret.reg);
4395                 break;
4396         case ArgInFloatReg:
4397                 ia64_adds_imm (code, IA64_SP, -16, IA64_SP);
4398                 ia64_adds_imm (code, GP_SCRATCH_REG, 16, IA64_SP);
4399                 ia64_stfd_hint (code, GP_SCRATCH_REG, cinfo->ret.reg, 0);
4400                 ia64_fmov (code, 8 + 1, cinfo->ret.reg);
4401                 break;
4402         case ArgValuetypeAddrInIReg:
4403                 ia64_mov (code, cfg->arch.reg_out0 + 1, cfg->arch.reg_in0 + cinfo->ret.reg);
4404                 break;
4405         case ArgAggregate:
4406                 NOT_IMPLEMENTED;
4407                 break;
4408         default:
4409                 break;
4410         }
4411
4412         add_patch_info (cfg, code, MONO_PATCH_INFO_METHODCONST, method);
4413         ia64_movl (code, cfg->arch.reg_out0 + 0, 0);
4414         code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, (gpointer)func);
4415
4416         /* Restore return value */
4417         switch (cinfo->ret.storage) {
4418         case ArgNone:
4419                 break;
4420         case ArgInIReg:
4421                 ia64_mov (code, cinfo->ret.reg, cfg->arch.reg_saved_return_val);
4422                 break;
4423         case ArgInFloatReg:
4424                 ia64_adds_imm (code, GP_SCRATCH_REG, 16, IA64_SP);
4425                 ia64_ldfd (code, cinfo->ret.reg, GP_SCRATCH_REG);
4426                 break;
4427         case ArgValuetypeAddrInIReg:
4428                 break;
4429         case ArgAggregate:
4430                 break;
4431         default:
4432                 break;
4433         }
4434
4435         ia64_codegen_close (code);
4436
4437         return code.buf;
4438 }
4439
4440 void
4441 mono_arch_save_unwind_info (MonoCompile *cfg)
4442 {
4443         unw_dyn_info_t *di;
4444
4445         /* FIXME: Unregister this for dynamic methods */
4446
4447         di = g_malloc0 (sizeof (unw_dyn_info_t));
4448         di->start_ip = (unw_word_t) cfg->native_code;
4449         di->end_ip = (unw_word_t) cfg->native_code + cfg->code_len;
4450         di->gp = 0;
4451         di->format = UNW_INFO_FORMAT_DYNAMIC;
4452         di->u.pi.name_ptr = (unw_word_t)mono_method_full_name (cfg->method, TRUE);
4453         di->u.pi.regions = cfg->arch.r_pro;
4454
4455         _U_dyn_register (di);
4456
4457         /*
4458         {
4459                 unw_dyn_region_info_t *region = di->u.pi.regions;
4460
4461                 printf ("Unwind info for method %s:\n", mono_method_full_name (cfg->method, TRUE));
4462                 while (region) {
4463                         printf ("    [Region: %d]\n", region->insn_count);
4464                         region = region->next;
4465                 }
4466         }
4467         */
4468 }
4469
4470 void
4471 mono_arch_flush_icache (guint8 *code, gint size)
4472 {
4473         guint8* p = (guint8*)((guint64)code & ~(0x3f));
4474         guint8* end = (guint8*)((guint64)code + size);
4475
4476 #ifdef __INTEL_COMPILER
4477         /* icc doesn't define an fc.i instrinsic, but fc==fc.i on itanium 2 */
4478         while (p < end) {
4479                 __fc ((guint64)p);
4480                 p += 32;
4481         }
4482 #else
4483         while (p < end) {
4484                 __asm__ __volatile__ ("fc.i %0"::"r"(p));
4485                 /* FIXME: This could be increased to 128 on some cpus */
4486                 p += 32;
4487         }
4488 #endif
4489 }
4490
4491 void
4492 mono_arch_flush_register_windows (void)
4493 {
4494         /* Not needed because of libunwind */
4495 }
4496
4497 gboolean 
4498 mono_arch_is_inst_imm (gint64 imm)
4499 {
4500         /* The lowering pass will take care of it */
4501
4502         return TRUE;
4503 }
4504
4505 /*
4506  * Determine whenever the trap whose info is in SIGINFO is caused by
4507  * integer overflow.
4508  */
4509 gboolean
4510 mono_arch_is_int_overflow (void *sigctx, void *info)
4511 {
4512         /* Division is emulated with explicit overflow checks */
4513         return FALSE;
4514 }
4515
4516 guint32
4517 mono_arch_get_patch_offset (guint8 *code)
4518 {
4519         NOT_IMPLEMENTED;
4520
4521         return 0;
4522 }
4523
4524 gpointer
4525 mono_arch_get_vcall_slot (guint8* code, gpointer *regs, int *displacement)
4526 {
4527         guint8 *bundle2 = code - 48;
4528         guint8 *bundle3 = code - 32;
4529         guint8 *bundle4 = code - 16;
4530         guint64 ins21 = ia64_bundle_ins1 (bundle2);
4531         guint64 ins22 = ia64_bundle_ins2 (bundle2);
4532         guint64 ins23 = ia64_bundle_ins3 (bundle2);
4533         guint64 ins31 = ia64_bundle_ins1 (bundle3);
4534         guint64 ins32 = ia64_bundle_ins2 (bundle3);
4535         guint64 ins33 = ia64_bundle_ins3 (bundle3);
4536         guint64 ins41 = ia64_bundle_ins1 (bundle4);
4537         guint64 ins42 = ia64_bundle_ins2 (bundle4);
4538         guint64 ins43 = ia64_bundle_ins3 (bundle4);
4539
4540         /* 
4541          * Virtual calls are made with:
4542          *
4543          * [MII]       ld8 r31=[r8]
4544          *             nop.i 0x0
4545          *             nop.i 0x0;;
4546          * [MII]       nop.m 0x0
4547          *             mov.sptk b6=r31,0x2000000000f32a80
4548          *             nop.i 0x0
4549          * [MII]       nop.m 0x0
4550          *             nop.i 0x123456
4551          *             nop.i 0x0
4552          * [MIB]       nop.m 0x0
4553          *             nop.i 0x0
4554          *             br.call.sptk.few b0=b6;;
4555          */
4556
4557         if (((ia64_bundle_template (bundle3) == IA64_TEMPLATE_MII) ||
4558                  (ia64_bundle_template (bundle3) == IA64_TEMPLATE_MIIS)) &&
4559                 (ia64_bundle_template (bundle4) == IA64_TEMPLATE_MIBS) &&
4560                 (ins31 == IA64_NOP_M) && 
4561                 (ia64_ins_opcode (ins32) == 0) && (ia64_ins_x3 (ins32) == 0) && (ia64_ins_x6 (ins32) == 0x1) && (ia64_ins_y (ins32) == 0) &&
4562                 (ins33 == IA64_NOP_I) &&
4563                 (ins41 == IA64_NOP_M) &&
4564                 (ins42 == IA64_NOP_I) &&
4565                 (ia64_ins_opcode (ins43) == 1) && (ia64_ins_b1 (ins43) == 0) && (ia64_ins_b2 (ins43) == 6) &&
4566                 ((ins32 >> 6) & 0xfffff) == 0x12345) {
4567                 g_assert (ins21 == IA64_NOP_M);
4568                 g_assert (ins23 == IA64_NOP_I);
4569                 g_assert (ia64_ins_opcode (ins22) == 0);
4570                 g_assert (ia64_ins_x3 (ins22) == 7);
4571                 g_assert (ia64_ins_x (ins22) == 0);
4572                 g_assert (ia64_ins_b1 (ins22) == IA64_B6);
4573
4574                 *displacement = (gssize)regs [IA64_R8] - (gssize)regs [IA64_R11];
4575
4576                 return regs [IA64_R11];
4577         }
4578
4579         return NULL;
4580 }
4581
4582 gpointer*
4583 mono_arch_get_vcall_slot_addr (guint8* code, gpointer *regs)
4584 {
4585         gpointer vt;
4586         int displacement;
4587         vt = mono_arch_get_vcall_slot (code, regs, &displacement);
4588         if (!vt)
4589                 return NULL;
4590         return (gpointer*)(gpointer)((char*)vt + displacement);
4591 }
4592
4593 gpointer*
4594 mono_arch_get_delegate_method_ptr_addr (guint8* code, gpointer *regs)
4595 {
4596         NOT_IMPLEMENTED;
4597
4598         return NULL;
4599 }
4600
4601 static gboolean tls_offset_inited = FALSE;
4602
4603 void
4604 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4605 {
4606         if (!tls_offset_inited) {
4607                 tls_offset_inited = TRUE;
4608
4609                 appdomain_tls_offset = mono_domain_get_tls_offset ();
4610                 thread_tls_offset = mono_thread_get_tls_offset ();
4611         }               
4612 }
4613
4614 void
4615 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4616 {
4617 }
4618
4619 #ifdef MONO_ARCH_HAVE_IMT
4620
4621 /*
4622  * LOCKING: called with the domain lock held
4623  */
4624 gpointer
4625 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
4626         gpointer fail_tramp)
4627 {
4628         int i;
4629         int size = 0;
4630         guint8 *start, *buf;
4631         Ia64CodegenState code;
4632
4633         g_assert (!fail_tramp);
4634
4635         size = count * 256;
4636         buf = g_malloc0 (size);
4637         ia64_codegen_init (code, buf);
4638
4639         /* IA64_R9 contains the IMT method */
4640
4641         for (i = 0; i < count; ++i) {
4642                 MonoIMTCheckItem *item = imt_entries [i];
4643                 ia64_begin_bundle (code);
4644                 item->code_target = (guint8*)code.buf + code.nins;
4645                 if (item->is_equals) {
4646                         if (item->check_target_idx) {
4647                                 if (!item->compare_done) {
4648                                         ia64_movl (code, GP_SCRATCH_REG, item->key);
4649                                         ia64_cmp_eq (code, 6, 7, IA64_R9, GP_SCRATCH_REG);
4650                                 }
4651                                 item->jmp_code = (guint8*)code.buf + code.nins;
4652                                 ia64_br_cond_pred (code, 7, 0);
4653
4654                                 ia64_movl (code, GP_SCRATCH_REG, &(vtable->vtable [item->value.vtable_slot]));
4655                                 ia64_ld8 (code, GP_SCRATCH_REG, GP_SCRATCH_REG);
4656                                 ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG);
4657                                 ia64_br_cond_reg (code, IA64_B6);
4658                         } else {
4659                                 /* enable the commented code to assert on wrong method */
4660 #if ENABLE_WRONG_METHOD_CHECK
4661                                 g_assert_not_reached ();
4662 #endif
4663                                 ia64_movl (code, GP_SCRATCH_REG, &(vtable->vtable [item->value.vtable_slot]));
4664                                 ia64_ld8 (code, GP_SCRATCH_REG, GP_SCRATCH_REG);
4665                                 ia64_mov_to_br (code, IA64_B6, GP_SCRATCH_REG);
4666                                 ia64_br_cond_reg (code, IA64_B6);
4667 #if ENABLE_WRONG_METHOD_CHECK
4668                                 g_assert_not_reached ();
4669 #endif
4670                         }
4671                 } else {
4672                         ia64_movl (code, GP_SCRATCH_REG, item->key);
4673                         ia64_cmp_geu (code, 6, 7, IA64_R9, GP_SCRATCH_REG);
4674                         item->jmp_code = (guint8*)code.buf + code.nins;
4675                         ia64_br_cond_pred (code, 6, 0);
4676                 }
4677         }
4678         /* patch the branches to get to the target items */
4679         for (i = 0; i < count; ++i) {
4680                 MonoIMTCheckItem *item = imt_entries [i];
4681                 if (item->jmp_code) {
4682                         if (item->check_target_idx) {
4683                                 ia64_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
4684                         }
4685                 }
4686         }
4687
4688         ia64_codegen_close (code);
4689         g_assert (code.buf - buf <= size);
4690
4691         size = code.buf - buf;
4692         start = mono_code_manager_reserve (domain->code_mp, size);
4693         memcpy (start, buf, size);
4694
4695         mono_arch_flush_icache (start, size);
4696
4697         mono_stats.imt_thunks_size += size;
4698
4699         return start;
4700 }
4701
4702 MonoMethod*
4703 mono_arch_find_imt_method (gpointer *regs, guint8 *code)
4704 {
4705         return regs [IA64_R9];
4706 }
4707
4708 void
4709 mono_arch_emit_imt_argument (MonoCompile *cfg, MonoCallInst *call, MonoInst *imt_arg)
4710 {
4711         /* Done by the implementation of the CALL_MEMBASE opcodes */
4712 }
4713 #endif
4714
4715 gpointer
4716 mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, gssize *regs, guint8 *code)
4717 {
4718         return (gpointer)regs [IA64_R10];
4719 }
4720
4721 MonoObject*
4722 mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSharingContext *gsctx)
4723 {
4724         return mono_arch_get_this_arg_from_call (gsctx, mono_method_signature (method), (gssize*)regs, NULL);
4725 }
4726
4727 gpointer
4728 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
4729 {
4730         return NULL;
4731 }
4732
4733 MonoInst*
4734 mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
4735 {
4736         MonoInst *ins = NULL;
4737
4738         if (cmethod->klass->image == mono_defaults.corlib &&
4739                 (strcmp (cmethod->klass->name_space, "System.Threading") == 0) &&
4740                 (strcmp (cmethod->klass->name, "Interlocked") == 0)) {
4741
4742                 /* 
4743                  * We don't use the generic version in mini_emit_inst_for_method () since we
4744                  * ia64 has atomic_add_imm opcodes.
4745                  */
4746                 if (strcmp (cmethod->name, "Increment") == 0) {
4747                         guint32 opcode;
4748
4749                         if (fsig->params [0]->type == MONO_TYPE_I4)
4750                                 opcode = OP_ATOMIC_ADD_IMM_NEW_I4;
4751                         else if (fsig->params [0]->type == MONO_TYPE_I8)
4752                                 opcode = OP_ATOMIC_ADD_IMM_NEW_I8;
4753                         else
4754                                 g_assert_not_reached ();
4755                         MONO_INST_NEW (cfg, ins, opcode);
4756                         ins->dreg = mono_alloc_preg (cfg);
4757                         ins->inst_imm = 1;
4758                         ins->inst_basereg = args [0]->dreg;
4759                         ins->inst_offset = 0;
4760                         MONO_ADD_INS (cfg->cbb, ins);
4761                 } else if (strcmp (cmethod->name, "Decrement") == 0) {
4762                         guint32 opcode;
4763
4764                         if (fsig->params [0]->type == MONO_TYPE_I4)
4765                                 opcode = OP_ATOMIC_ADD_IMM_NEW_I4;
4766                         else if (fsig->params [0]->type == MONO_TYPE_I8)
4767                                 opcode = OP_ATOMIC_ADD_IMM_NEW_I8;
4768                         else
4769                                 g_assert_not_reached ();
4770                         MONO_INST_NEW (cfg, ins, opcode);
4771                         ins->dreg = mono_alloc_preg (cfg);
4772                         ins->inst_imm = -1;
4773                         ins->inst_basereg = args [0]->dreg;
4774                         ins->inst_offset = 0;
4775                         MONO_ADD_INS (cfg->cbb, ins);
4776                 } else if (strcmp (cmethod->name, "Add") == 0) {
4777                         guint32 opcode;
4778                         gboolean is_imm = FALSE;
4779                         gint64 imm = 0;
4780
4781                         if ((args [1]->opcode == OP_ICONST) || (args [1]->opcode == OP_I8CONST)) {
4782                                 imm = (args [1]->opcode == OP_ICONST) ? args [1]->inst_c0 : args [1]->inst_l;
4783
4784                                 is_imm = (imm == 1 || imm == 4 || imm == 8 || imm == 16 || imm == -1 || imm == -4 || imm == -8 || imm == -16);
4785                         }
4786
4787                         if (is_imm) {
4788                                 if (fsig->params [0]->type == MONO_TYPE_I4)
4789                                         opcode = OP_ATOMIC_ADD_IMM_NEW_I4;
4790                                 else if (fsig->params [0]->type == MONO_TYPE_I8)
4791                                         opcode = OP_ATOMIC_ADD_IMM_NEW_I8;
4792                                 else
4793                                         g_assert_not_reached ();
4794
4795                                 MONO_INST_NEW (cfg, ins, opcode);
4796                                 ins->dreg = mono_alloc_ireg (cfg);
4797                                 ins->inst_basereg = args [0]->dreg;
4798                                 ins->inst_offset = 0;
4799                                 ins->inst_imm = imm;
4800                                 ins->type = (opcode == OP_ATOMIC_ADD_IMM_NEW_I4) ? STACK_I4 : STACK_I8;
4801                         } else {
4802                                 if (fsig->params [0]->type == MONO_TYPE_I4)
4803                                         opcode = OP_ATOMIC_ADD_NEW_I4;
4804                                 else if (fsig->params [0]->type == MONO_TYPE_I8)
4805                                         opcode = OP_ATOMIC_ADD_NEW_I8;
4806                                 else
4807                                         g_assert_not_reached ();
4808
4809                                 MONO_INST_NEW (cfg, ins, opcode);
4810                                 ins->dreg = mono_alloc_ireg (cfg);
4811                                 ins->inst_basereg = args [0]->dreg;
4812                                 ins->inst_offset = 0;
4813                                 ins->sreg2 = args [1]->dreg;
4814                                 ins->type = (opcode == OP_ATOMIC_ADD_NEW_I4) ? STACK_I4 : STACK_I8;
4815                         }
4816                         MONO_ADD_INS (cfg->cbb, ins);
4817                 }
4818         }
4819
4820         return ins;
4821 }
4822
4823 gboolean
4824 mono_arch_print_tree (MonoInst *tree, int arity)
4825 {
4826         return 0;
4827 }
4828
4829 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
4830 {
4831         MonoInst* ins;
4832         
4833         if (appdomain_tls_offset == -1)
4834                 return NULL;
4835         
4836         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4837         ins->inst_offset = appdomain_tls_offset;
4838         return ins;
4839 }
4840
4841 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
4842 {
4843         MonoInst* ins;
4844         
4845         if (thread_tls_offset == -1)
4846                 return NULL;
4847         
4848         MONO_INST_NEW (cfg, ins, OP_TLS_GET);
4849         ins->inst_offset = thread_tls_offset;
4850         return ins;
4851 }
4852
4853 gpointer
4854 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
4855 {
4856         /* FIXME: implement */
4857         g_assert_not_reached ();
4858 }