X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mono%2Fmini%2Fmini-ppc.c;h=a23e078f45f3315857a06f1645f633c17575d3d7;hb=f9467fdbd01a3bab5d2f2e0cf34567788303945a;hp=b31a790fec2693c22b5bd3b469f48fce2aaa48d0;hpb=9dd0a3bcbd8e76e15b16349e0bb59c0bdf78e31b;p=mono.git diff --git a/mono/mini/mini-ppc.c b/mono/mini/mini-ppc.c old mode 100644 new mode 100755 index b31a790fec2..a23e078f45f --- a/mono/mini/mini-ppc.c +++ b/mono/mini/mini-ppc.c @@ -14,14 +14,23 @@ #include #include +#include +#include #include "mini-ppc.h" +#ifdef TARGET_POWERPC64 +#include "cpu-ppc64.h" +#else #include "cpu-ppc.h" +#endif #include "trace.h" #include "ir-emit.h" #ifdef __APPLE__ #include #endif +#ifdef __linux__ +#include +#endif #define FORCE_INDIR_CALL 1 @@ -34,6 +43,22 @@ enum { TLS_MODE_DARWIN_G5 }; +/* cpu_hw_caps contains the flags defined below */ +static int cpu_hw_caps = 0; +static int cachelinesize = 0; +static int cachelineinc = 0; +enum { + PPC_ICACHE_SNOOP = 1 << 0, + PPC_MULTIPLE_LS_UNITS = 1 << 1, + PPC_SMP_CAPABLE = 1 << 2, + PPC_ISA_2X = 1 << 3, + PPC_ISA_64 = 1 << 4, + PPC_MOVE_FPR_GPR = 1 << 5, + PPC_HW_CAP_END +}; + +#define BREAKPOINT_SIZE (PPC_LOAD_SEQUENCE_LENGTH + 4) + /* This mutex protects architecture specific caches */ #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex) #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex) @@ -42,9 +67,17 @@ static CRITICAL_SECTION mini_arch_mutex; int mono_exc_esp_offset = 0; static int tls_mode = TLS_MODE_DETECT; static int lmf_pthread_key = -1; -static int monothread_key = -1; static int monodomain_key = -1; +/* + * The code generated for sequence points reads from this location, which is + * made read-only when single stepping is enabled. + */ +static gpointer ss_trigger_page; + +/* Enabled breakpoints read from this trigger page */ +static gpointer bp_trigger_page; + static int offsets_from_pthread_key (guint32 key, int *offset2) { @@ -57,14 +90,14 @@ offsets_from_pthread_key (guint32 key, int *offset2) #define emit_linuxthreads_tls(code,dreg,key) do {\ int off1, off2; \ off1 = offsets_from_pthread_key ((key), &off2); \ - ppc_load_reg ((code), (dreg), off1, ppc_r2); \ - ppc_load_reg ((code), (dreg), off2, (dreg)); \ + ppc_ldptr ((code), (dreg), off1, ppc_r2); \ + ppc_ldptr ((code), (dreg), off2, (dreg)); \ } while (0); #define emit_darwing5_tls(code,dreg,key) do {\ int off1 = 0x48 + key * sizeof (gpointer); \ ppc_mfspr ((code), (dreg), 104); \ - ppc_load_reg ((code), (dreg), off1, (dreg)); \ + ppc_ldptr ((code), (dreg), off1, (dreg)); \ } while (0); /* FIXME: ensure the sc call preserves all but r3 */ @@ -77,9 +110,28 @@ offsets_from_pthread_key (guint32 key, int *offset2) if ((dreg) != ppc_r3) ppc_mr ((code), ppc_r3, ppc_r11); \ } while (0); +#ifdef PPC_THREAD_PTR_REG +#define emit_nptl_tls(code,dreg,key) do { \ + int off1 = key; \ + int off2 = key >> 15; \ + if ((off2 == 0) || (off2 == -1)) { \ + ppc_ldptr ((code), (dreg), off1, PPC_THREAD_PTR_REG); \ + } else { \ + int off3 = (off2 + 1) > 1; \ + ppc_addis ((code), ppc_r11, PPC_THREAD_PTR_REG, off3); \ + ppc_ldptr ((code), (dreg), off1, ppc_r11); \ + } \ + } while (0); +#else +#define emit_nptl_tls(code,dreg,key) do { \ + g_assert_not_reached (); \ + } while (0) +#endif + #define emit_tls_access(code,dreg,key) do { \ switch (tls_mode) { \ case TLS_MODE_LTHREADS: emit_linuxthreads_tls(code,dreg,key); break; \ + case TLS_MODE_NPTL: emit_nptl_tls(code,dreg,key); break; \ case TLS_MODE_DARWIN_G5: emit_darwing5_tls(code,dreg,key); break; \ case TLS_MODE_DARWIN_G4: emit_darwing4_tls(code,dreg,key); break; \ default: g_assert_not_reached (); \ @@ -87,7 +139,7 @@ offsets_from_pthread_key (guint32 key, int *offset2) } while (0) #define MONO_EMIT_NEW_LOAD_R8(cfg,dr,addr) do { \ - MonoInst *inst; \ + MonoInst *inst; \ MONO_INST_NEW ((cfg), (inst), OP_R8CONST); \ inst->type = STACK_R8; \ inst->dreg = (dr); \ @@ -133,24 +185,59 @@ emit_memcpy (guint8 *code, int size, int dreg, int doffset, int sreg, int soffse { /* unrolled, use the counter in big */ if (size > sizeof (gpointer) * 5) { - int shifted = size >> 2; + long shifted = size / SIZEOF_VOID_P; guint8 *copy_loop_start, *copy_loop_jump; ppc_load (code, ppc_r0, shifted); ppc_mtctr (code, ppc_r0); g_assert (sreg == ppc_r11); - ppc_addi (code, ppc_r12, dreg, (doffset - 4)); - ppc_addi (code, ppc_r11, sreg, (soffset - 4)); + ppc_addi (code, ppc_r12, dreg, (doffset - sizeof (gpointer))); + ppc_addi (code, ppc_r11, sreg, (soffset - sizeof (gpointer))); copy_loop_start = code; - ppc_lwzu (code, ppc_r0, ppc_r11, 4); - ppc_stwu (code, ppc_r0, 4, ppc_r12); + ppc_ldptr_update (code, ppc_r0, (unsigned int)sizeof (gpointer), ppc_r11); + ppc_stptr_update (code, ppc_r0, (unsigned int)sizeof (gpointer), ppc_r12); copy_loop_jump = code; ppc_bc (code, PPC_BR_DEC_CTR_NONZERO, 0, 0); ppc_patch (copy_loop_jump, copy_loop_start); - size -= shifted * 4; + size -= shifted * sizeof (gpointer); doffset = soffset = 0; dreg = ppc_r12; } +#ifdef __mono_ppc64__ + /* the hardware has multiple load/store units and the move is long + enough to use more then one regiester, then use load/load/store/store + to execute 2 instructions per cycle. */ + if ((cpu_hw_caps & PPC_MULTIPLE_LS_UNITS) && (dreg != ppc_r12) && (sreg != ppc_r12)) { + while (size >= 16) { + ppc_ldptr (code, ppc_r0, soffset, sreg); + ppc_ldptr (code, ppc_r12, soffset+8, sreg); + ppc_stptr (code, ppc_r0, doffset, dreg); + ppc_stptr (code, ppc_r12, doffset+8, dreg); + size -= 16; + soffset += 16; + doffset += 16; + } + } + while (size >= 8) { + ppc_ldr (code, ppc_r0, soffset, sreg); + ppc_str (code, ppc_r0, doffset, dreg); + size -= 8; + soffset += 8; + doffset += 8; + } +#else + if ((cpu_hw_caps & PPC_MULTIPLE_LS_UNITS) && (dreg != ppc_r12) && (sreg != ppc_r12)) { + while (size >= 8) { + ppc_lwz (code, ppc_r0, soffset, sreg); + ppc_lwz (code, ppc_r12, soffset+4, sreg); + ppc_stw (code, ppc_r0, doffset, dreg); + ppc_stw (code, ppc_r12, doffset+4, dreg); + size -= 8; + soffset += 8; + doffset += 8; + } + } +#endif while (size >= 4) { ppc_lwz (code, ppc_r0, soffset, sreg); ppc_stw (code, ppc_r0, doffset, dreg); @@ -189,6 +276,10 @@ emit_memcpy (guint8 *code, int size, int dreg, int doffset, int sreg, int soffse int mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info) { +#ifdef __mono_ppc64__ + NOT_IMPLEMENTED; + return -1; +#else int k, frame_size = 0; int size, align, pad; int offset = 8; @@ -232,26 +323,63 @@ mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJit arg_info [k].pad = pad; return frame_size; +#endif +} + +#ifdef __mono_ppc64__ +static gboolean +is_load_sequence (guint32 *seq) +{ + return ppc_opcode (seq [0]) == 15 && /* lis */ + ppc_opcode (seq [1]) == 24 && /* ori */ + ppc_opcode (seq [2]) == 30 && /* sldi */ + ppc_opcode (seq [3]) == 25 && /* oris */ + ppc_opcode (seq [4]) == 24; /* ori */ } +#define ppc_load_get_dest(l) (((l)>>21) & 0x1f) +#define ppc_load_get_off(l) ((gint16)((l) & 0xffff)) +#endif + /* code must point to the blrl */ gboolean mono_ppc_is_direct_call_sequence (guint32 *code) { +#ifdef __mono_ppc64__ + g_assert(*code == 0x4e800021 || *code == 0x4e800020 || *code == 0x4e800420); + + /* the thunk-less direct call sequence: lis/ori/sldi/oris/ori/mtlr/blrl */ + if (ppc_opcode (code [-1]) == 31) { /* mtlr */ + if (ppc_opcode (code [-2]) == 58 && ppc_opcode (code [-3]) == 58) { /* ld/ld */ + if (!is_load_sequence (&code [-8])) + return FALSE; + /* one of the loads must be "ld r2,8(rX)" */ + return (ppc_load_get_dest (code [-2]) == ppc_r2 && ppc_load_get_off (code [-2]) == 8) || + (ppc_load_get_dest (code [-3]) == ppc_r2 && ppc_load_get_off (code [-3]) == 8); + } + if (ppc_opcode (code [-2]) == 24 && ppc_opcode (code [-3]) == 31) /* mr/nop */ + return is_load_sequence (&code [-8]); + else + return is_load_sequence (&code [-6]); + } + return FALSE; +#else g_assert(*code == 0x4e800021); /* the thunk-less direct call sequence: lis/ori/mtlr/blrl */ return ppc_opcode (code [-1]) == 31 && ppc_opcode (code [-2]) == 24 && ppc_opcode (code [-3]) == 15; +#endif } gpointer -mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement) +mono_arch_get_vcall_slot (guint8 *code_ptr, mgreg_t *regs, int *displacement) { char *o = NULL; int reg, offset = 0; guint32* code = (guint32*)code_ptr; + mgreg_t *r = (mgreg_t*)regs; *displacement = 0; @@ -262,11 +390,10 @@ mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement) if (*code != 0x4e800021) return NULL; - /* the thunk-less direct call sequence: lis/ori/mtlr/blrl */ - if ((code [-1] >> 26) == 31 && (code [-2] >> 26) == 24 && (code [-3] >> 26) == 15) { + if (mono_ppc_is_direct_call_sequence (code)) return NULL; - } + /* FIXME: more sanity checks here */ /* OK, we're now at the 'blrl' instruction. Now walk backwards till we get to a 'mtlr rA' */ for (; --code;) { @@ -285,13 +412,7 @@ mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement) reg = (*code >> 16) & 0x1f; g_assert (reg != ppc_r1); /*g_print ("patching reg is %d\n", reg);*/ - if (reg >= 13) { - MonoLMF *lmf = (MonoLMF*)((char*)regs + (14 * sizeof (double)) + (13 * sizeof (gulong))); - /* saved in the MonoLMF structure */ - o = (gpointer)lmf->iregs [reg - 13]; - } else { - o = regs [reg]; - } + o = (gpointer)(gsize)r [reg]; break; } } @@ -299,18 +420,83 @@ mono_arch_get_vcall_slot (guint8 *code_ptr, gpointer *regs, int *displacement) return o; } -gpointer* -mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs) +#define MAX_ARCH_DELEGATE_PARAMS 7 + +static gpointer +get_delegate_invoke_impl (gboolean has_target, guint32 param_count, guint32 *code_len, gboolean aot) { - gpointer vt; - int displacement; - vt = mono_arch_get_vcall_slot (code, regs, &displacement); - if (!vt) - return NULL; - return (gpointer*)((char*)vt + displacement); + guint8 *code, *start; + + if (has_target) { + int size = MONO_PPC_32_64_CASE (32, 32) + PPC_FTNPTR_SIZE; + + start = code = mono_global_codeman_reserve (size); + if (!aot) + code = mono_ppc_create_pre_code_ftnptr (code); + + /* Replace the this argument with the target */ + ppc_ldptr (code, ppc_r0, G_STRUCT_OFFSET (MonoDelegate, method_ptr), ppc_r3); +#ifdef PPC_USES_FUNCTION_DESCRIPTOR + /* it's a function descriptor */ + /* Can't use ldptr as it doesn't work with r0 */ + ppc_ldptr_indexed (code, ppc_r0, 0, ppc_r0); +#endif + ppc_mtctr (code, ppc_r0); + ppc_ldptr (code, ppc_r3, G_STRUCT_OFFSET (MonoDelegate, target), ppc_r3); + ppc_bcctr (code, PPC_BR_ALWAYS, 0); + + g_assert ((code - start) <= size); + + mono_arch_flush_icache (start, size); + } else { + int size, i; + + size = MONO_PPC_32_64_CASE (32, 32) + param_count * 4 + PPC_FTNPTR_SIZE; + start = code = mono_global_codeman_reserve (size); + if (!aot) + code = mono_ppc_create_pre_code_ftnptr (code); + + ppc_ldptr (code, ppc_r0, G_STRUCT_OFFSET (MonoDelegate, method_ptr), ppc_r3); +#ifdef PPC_USES_FUNCTION_DESCRIPTOR + /* it's a function descriptor */ + ppc_ldptr_indexed (code, ppc_r0, 0, ppc_r0); +#endif + ppc_mtctr (code, ppc_r0); + /* slide down the arguments */ + for (i = 0; i < param_count; ++i) { + ppc_mr (code, (ppc_r3 + i), (ppc_r3 + i + 1)); + } + ppc_bcctr (code, PPC_BR_ALWAYS, 0); + + g_assert ((code - start) <= size); + + mono_arch_flush_icache (start, size); + } + + if (code_len) + *code_len = code - start; + + return start; } -#define MAX_ARCH_DELEGATE_PARAMS 7 +GSList* +mono_arch_get_delegate_invoke_impls (void) +{ + GSList *res = NULL; + guint8 *code; + guint32 code_len; + int i; + + code = get_delegate_invoke_impl (TRUE, 0, &code_len, TRUE); + res = g_slist_prepend (res, mono_aot_tramp_info_create (g_strdup ("delegate_invoke_impl_has_target"), code, code_len)); + + for (i = 0; i < MAX_ARCH_DELEGATE_PARAMS; ++i) { + code = get_delegate_invoke_impl (FALSE, i, &code_len, TRUE); + res = g_slist_prepend (res, mono_aot_tramp_info_create (g_strdup_printf ("delegate_invoke_impl_target_%d", i), code, code_len)); + } + + return res; +} gpointer mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target) @@ -323,29 +509,21 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe if (has_target) { static guint8* cached = NULL; - mono_mini_arch_lock (); - if (cached) { - mono_mini_arch_unlock (); + + if (cached) return cached; - } - - start = code = mono_global_codeman_reserve (16); - /* Replace the this argument with the target */ - ppc_lwz (code, ppc_r0, G_STRUCT_OFFSET (MonoDelegate, method_ptr), ppc_r3); - ppc_mtctr (code, ppc_r0); - ppc_lwz (code, ppc_r3, G_STRUCT_OFFSET (MonoDelegate, target), ppc_r3); - ppc_bcctr (code, PPC_BR_ALWAYS, 0); + if (mono_aot_only) + start = mono_aot_get_named_code ("delegate_invoke_impl_has_target"); + else + start = get_delegate_invoke_impl (TRUE, 0, NULL, FALSE); - g_assert ((code - start) <= 16); + mono_memory_barrier (); - mono_arch_flush_icache (start, 16); cached = start; - mono_mini_arch_unlock (); - return cached; } else { static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL}; - int size, i; + int i; if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS) return NULL; @@ -353,49 +531,151 @@ mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_targe if (!mono_is_regsize_var (sig->params [i])) return NULL; - mono_mini_arch_lock (); + code = cache [sig->param_count]; - if (code) { - mono_mini_arch_unlock (); + if (code) return code; - } - - size = 12 + sig->param_count * 4; - start = code = mono_global_codeman_reserve (size); - ppc_lwz (code, ppc_r0, G_STRUCT_OFFSET (MonoDelegate, method_ptr), ppc_r3); - ppc_mtctr (code, ppc_r0); - /* slide down the arguments */ - for (i = 0; i < sig->param_count; ++i) { - ppc_mr (code, (ppc_r3 + i), (ppc_r3 + i + 1)); + if (mono_aot_only) { + char *name = g_strdup_printf ("delegate_invoke_impl_target_%d", sig->param_count); + start = mono_aot_get_named_code (name); + g_free (name); + } else { + start = get_delegate_invoke_impl (FALSE, sig->param_count, NULL, FALSE); } - ppc_bcctr (code, PPC_BR_ALWAYS, 0); - g_assert ((code - start) <= size); + mono_memory_barrier (); - mono_arch_flush_icache (start, size); cache [sig->param_count] = start; - mono_mini_arch_unlock (); - return start; } - return NULL; + return start; } gpointer -mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, gssize *regs, guint8 *code) +mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, mgreg_t *regs, guint8 *code) { + mgreg_t *r = (mgreg_t*)regs; + /* FIXME: handle returning a struct */ if (MONO_TYPE_ISSTRUCT (sig->ret)) - return (gpointer)regs [ppc_r4]; - return (gpointer)regs [ppc_r3]; + return (gpointer)(gsize)r [ppc_r4]; + return (gpointer)(gsize)r [ppc_r3]; +} + +typedef struct { + long int type; + long int value; +} AuxVec; + +#ifdef USE_ENVIRON_HACK +static AuxVec* +linux_find_auxv (int *count) +{ + AuxVec *vec; + int c = 0; + char **result = __environ; + /* Scan over the env vector looking for the ending NULL */ + for (; *result != NULL; ++result) { + } + /* Bump the pointer one more step, which should be the auxv. */ + ++result; + vec = (AuxVec *)result; + if (vec->type != 22 /*AT_IGNOREPPC*/) { + *count = 0; + return NULL; + } + while (vec->type != 0 /*AT_NULL*/) { + vec++; + c++; + } + *count = c; + return (AuxVec *)result; } +#endif + +#define MAX_AUX_ENTRIES 128 +/* + * PPC_FEATURE_POWER4, PPC_FEATURE_POWER5, PPC_FEATURE_POWER5_PLUS, PPC_FEATURE_CELL, + * PPC_FEATURE_PA6T, PPC_FEATURE_ARCH_2_05 are considered supporting 2X ISA features + */ +#define ISA_2X (0x00080000 | 0x00040000 | 0x00020000 | 0x00010000 | 0x00000800 | 0x00001000) +/* define PPC_FEATURE_64 HWCAP for 64-bit category. */ +#define ISA_64 0x40000000 + +/* define PPC_FEATURE_POWER6_EXT HWCAP for power6x mffgpr/mftgpr instructions. */ +#define ISA_MOVE_FPR_GPR 0x00000200 /* * Initialize the cpu to execute managed code. */ void mono_arch_cpu_init (void) { +#ifdef __APPLE__ + int mib [3]; + size_t len; + mib [0] = CTL_HW; + mib [1] = HW_CACHELINE; + len = sizeof (cachelinesize); + if (sysctl (mib, 2, &cachelinesize, (size_t*)&len, NULL, 0) == -1) { + perror ("sysctl"); + cachelinesize = 128; + } else { + cachelineinc = cachelinesize; + } +#elif defined(__linux__) + AuxVec vec [MAX_AUX_ENTRIES]; + int i, vec_entries = 0; + /* sadly this will work only with 2.6 kernels... */ + FILE* f = fopen ("/proc/self/auxv", "rb"); + if (f) { + vec_entries = fread (&vec, sizeof (AuxVec), MAX_AUX_ENTRIES, f); + fclose (f); +#ifdef USE_ENVIRON_HACK + } else { + AuxVec *evec = linux_find_auxv (&vec_entries); + if (vec_entries) + memcpy (&vec, evec, sizeof (AuxVec) * MIN (vec_entries, MAX_AUX_ENTRIES)); +#endif + } + for (i = 0; i < vec_entries; i++) { + int type = vec [i].type; + if (type == 19) { /* AT_DCACHEBSIZE */ + cachelinesize = vec [i].value; + continue; + } else if (type == 16) { /* AT_HWCAP */ + if (vec [i].value & 0x00002000 /*PPC_FEATURE_ICACHE_SNOOP*/) + cpu_hw_caps |= PPC_ICACHE_SNOOP; + if (vec [i].value & ISA_2X) + cpu_hw_caps |= PPC_ISA_2X; + if (vec [i].value & ISA_64) + cpu_hw_caps |= PPC_ISA_64; + if (vec [i].value & ISA_MOVE_FPR_GPR) + cpu_hw_caps |= PPC_MOVE_FPR_GPR; + continue; + } else if (type == 15) { /* AT_PLATFORM */ + const char *arch = (char*)vec [i].value; + if (strcmp (arch, "ppc970") == 0 || + (strncmp (arch, "power", 5) == 0 && arch [5] >= '4' && arch [5] <= '7')) + cpu_hw_caps |= PPC_MULTIPLE_LS_UNITS; + /*printf ("cpu: %s\n", (char*)vec [i].value);*/ + continue; + } + } +#elif defined(G_COMPILER_CODEWARRIOR) + cachelinesize = 32; + cachelineinc = 32; +#elif defined(MONO_CROSS_COMPILE) +#else +//#error Need a way to get cache line size +#endif + if (!cachelinesize) + cachelinesize = 32; + if (!cachelineinc) + cachelineinc = cachelinesize; + + if (mono_cpu_count () > 1) + cpu_hw_caps |= PPC_SMP_CAPABLE; } /* @@ -404,7 +684,11 @@ mono_arch_cpu_init (void) void mono_arch_init (void) { - InitializeCriticalSection (&mini_arch_mutex); + InitializeCriticalSection (&mini_arch_mutex); + + ss_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT); + bp_trigger_page = mono_valloc (NULL, mono_pagesize (), MONO_MMAP_READ|MONO_MMAP_32BIT); + mono_mprotect (bp_trigger_page, mono_pagesize (), 0); } /* @@ -429,6 +713,14 @@ mono_arch_cpu_optimizazions (guint32 *exclude_mask) return opts; } +#ifdef __mono_ppc64__ +#define CASE_PPC32(c) +#define CASE_PPC64(c) case c: +#else +#define CASE_PPC32(c) case c: +#define CASE_PPC64(c) +#endif + static gboolean is_regsize_var (MonoType *t) { if (t->byref) @@ -437,6 +729,8 @@ is_regsize_var (MonoType *t) { switch (t->type) { case MONO_TYPE_I4: case MONO_TYPE_U4: + CASE_PPC64 (MONO_TYPE_I8) + CASE_PPC64 (MONO_TYPE_U8) case MONO_TYPE_I: case MONO_TYPE_U: case MONO_TYPE_PTR: @@ -494,8 +788,14 @@ mono_arch_get_global_int_regs (MonoCompile *cfg) if (cfg->frame_reg != ppc_sp) top = 31; /* ppc_r13 is used by the system on PPC EABI */ - for (i = 14; i < top; ++i) - regs = g_list_prepend (regs, GUINT_TO_POINTER (i)); + for (i = 14; i < top; ++i) { + /* + * Reserve r29 for holding the vtable address for virtual calls in AOT mode, + * since the trampolines can clobber r11. + */ + if (!(cfg->compile_aot && i == 29)) + regs = g_list_prepend (regs, GUINT_TO_POINTER (i)); + } return regs; } @@ -514,62 +814,20 @@ mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv) return 2; } -typedef struct { - long int type; - long int value; -} AuxVec; - void mono_arch_flush_icache (guint8 *code, gint size) { +#ifdef MONO_CROSS_COMPILE +#else register guint8 *p; guint8 *endp, *start; - static int cachelinesize = 0; - static int cachelineinc = 16; - if (!cachelinesize) { -#ifdef __APPLE__ - int mib [3]; - size_t len; - mib [0] = CTL_HW; - mib [1] = HW_CACHELINE; - len = sizeof (cachelinesize); - if (sysctl(mib, 2, &cachelinesize, (size_t*)&len, NULL, 0) == -1) { - perror ("sysctl"); - cachelinesize = 128; - } else { - cachelineinc = cachelinesize; - /*g_print ("setting cl size to %d\n", cachelinesize);*/ - } -#elif defined(__linux__) - /* sadly this will work only with 2.6 kernels... */ - FILE* f = fopen ("/proc/self/auxv", "rb"); - if (f) { - AuxVec vec; - while (fread (&vec, sizeof (vec), 1, f) == 1) { - if (vec.type == 19) { - cachelinesize = vec.value; - break; - } - } - fclose (f); - } - if (!cachelinesize) - cachelinesize = 128; -#elif defined(G_COMPILER_CODEWARRIOR) - cachelinesize = 32; - cachelineinc = 32; -#else -#warning Need a way to get cache line size - cachelinesize = 128; -#endif - } p = start = code; endp = p + size; start = (guint8*)((gsize)start & ~(cachelinesize - 1)); /* use dcbf for smp support, later optimize for UP, see pem._64bit.d20030611.pdf page 211 */ #if defined(G_COMPILER_CODEWARRIOR) - if (1) { + if (cpu_hw_caps & PPC_SMP_CAPABLE) { for (p = start; p < endp; p += cachelineinc) { asm { dcbf 0, p }; } @@ -591,7 +849,19 @@ mono_arch_flush_icache (guint8 *code, gint size) isync } #else - if (1) { + /* For POWER5/6 with ICACHE_SNOOPing only one icbi in the range is required. + * The sync is required to insure that the store queue is completely empty. + * While the icbi performs no cache operations, icbi/isync is required to + * kill local prefetch. + */ + if (cpu_hw_caps & PPC_ICACHE_SNOOP) { + asm ("sync"); + asm ("icbi 0,%0;" : : "r"(code) : "memory"); + asm ("isync"); + return; + } + /* use dcbf for smp support, see pem._64bit.d20030611.pdf page 211 */ + if (cpu_hw_caps & PPC_SMP_CAPABLE) { for (p = start; p < endp; p += cachelineinc) { asm ("dcbf 0,%0;" : : "r"(p) : "memory"); } @@ -603,11 +873,21 @@ mono_arch_flush_icache (guint8 *code, gint size) asm ("sync"); p = code; for (p = start; p < endp; p += cachelineinc) { - asm ("icbi 0,%0; sync;" : : "r"(p) : "memory"); + /* for ISA2.0+ implementations we should not need any extra sync between the + * icbi instructions. Both the 2.0 PEM and the PowerISA-2.05 say this. + * So I am not sure which chip had this problem but its not an issue on + * of the ISA V2 chips. + */ + if (cpu_hw_caps & PPC_ISA_2X) + asm ("icbi 0,%0;" : : "r"(p) : "memory"); + else + asm ("icbi 0,%0; sync;" : : "r"(p) : "memory"); } - asm ("sync"); + if (!(cpu_hw_caps & PPC_ISA_2X)) + asm ("sync"); asm ("isync"); #endif +#endif } void @@ -619,8 +899,13 @@ mono_arch_flush_register_windows (void) #define ALWAYS_ON_STACK(s) s #define FP_ALSO_IN_REG(s) s #else +#ifdef __mono_ppc64__ +#define ALWAYS_ON_STACK(s) s +#define FP_ALSO_IN_REG(s) s +#else #define ALWAYS_ON_STACK(s) #define FP_ALSO_IN_REG(s) +#endif #define ALIGN_DOUBLES #endif @@ -636,8 +921,12 @@ typedef struct { gint32 offset; guint32 vtsize; /* in param area */ guint8 reg; + guint8 vtregs; /* number of registers used to pass a RegTypeStructByVal */ guint8 regtype : 4; /* 0 general, 1 basereg, 2 floating point register, see RegType* */ guint8 size : 4; /* 1, 2, 4, 8, or regs used by RegTypeStructByVal */ + guint8 bytes : 4; /* size in bytes - only valid for + RegTypeStructByVal if the struct fits + in one word, otherwise it's 0*/ } ArgInfo; typedef struct { @@ -654,14 +943,18 @@ typedef struct { static void inline add_general (guint *gr, guint *stack_size, ArgInfo *ainfo, gboolean simple) { +#ifdef __mono_ppc64__ + g_assert (simple); +#endif + if (simple) { if (*gr >= 3 + PPC_NUM_REG_ARGS) { ainfo->offset = PPC_STACK_PARAM_OFFSET + *stack_size; ainfo->reg = ppc_sp; /* in the caller */ ainfo->regtype = RegTypeBase; - *stack_size += 4; + *stack_size += sizeof (gpointer); } else { - ALWAYS_ON_STACK (*stack_size += 4); + ALWAYS_ON_STACK (*stack_size += sizeof (gpointer)); ainfo->reg = *gr; } } else { @@ -686,7 +979,7 @@ add_general (guint *gr, guint *stack_size, ArgInfo *ainfo, gboolean simple) (*gr) ++; } -#if __APPLE__ +#if defined(__APPLE__) || defined(__mono_ppc64__) static gboolean has_only_a_r48_field (MonoClass *klass) { @@ -713,7 +1006,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke) { guint i, fr, gr; int n = sig->hasthis + sig->param_count; - guint32 simpletype; + MonoType *simpletype; guint32 stack_size = 0; CallInfo *cinfo = g_malloc0 (sizeof (CallInfo) + sizeof (ArgInfo) * n); @@ -748,8 +1041,8 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke) n++; continue; } - simpletype = mini_type_get_underlying_type (NULL, sig->params [i])->type; - switch (simpletype) { + simpletype = mini_type_get_underlying_type (NULL, sig->params [i]); + switch (simpletype->type) { case MONO_TYPE_BOOLEAN: case MONO_TYPE_I1: case MONO_TYPE_U1: @@ -784,22 +1077,27 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke) n++; break; case MONO_TYPE_GENERICINST: - if (!mono_type_generic_inst_is_valuetype (sig->params [i])) { + if (!mono_type_generic_inst_is_valuetype (simpletype)) { cinfo->args [n].size = sizeof (gpointer); add_general (&gr, &stack_size, cinfo->args + n, TRUE); n++; break; } /* Fall through */ - case MONO_TYPE_VALUETYPE: { + case MONO_TYPE_VALUETYPE: + case MONO_TYPE_TYPEDBYREF: { gint size; MonoClass *klass; + klass = mono_class_from_mono_type (sig->params [i]); - if (is_pinvoke) + if (simpletype->type == MONO_TYPE_TYPEDBYREF) + size = sizeof (MonoTypedRef); + else if (is_pinvoke) size = mono_class_native_size (klass, NULL); else size = mono_class_value_size (klass, NULL); -#if __APPLE__ + +#if defined(__APPLE__) || defined(__mono_ppc64__) if ((size == 4 || size == 8) && has_only_a_r48_field (klass)) { cinfo->args [n].size = size; @@ -824,59 +1122,41 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke) #endif DEBUG(printf ("load %d bytes struct\n", mono_class_native_size (sig->params [i]->data.klass, NULL))); + #if PPC_PASS_STRUCTS_BY_VALUE { int align_size = size; - int nwords = 0; + int nregs = 0; int rest = PPC_LAST_ARG_REG - gr + 1; int n_in_regs; + align_size += (sizeof (gpointer) - 1); align_size &= ~(sizeof (gpointer) - 1); - nwords = (align_size + sizeof (gpointer) -1 ) / sizeof (gpointer); - n_in_regs = MIN (rest, nwords); - cinfo->args [n].regtype = RegTypeStructByVal; - if (gr > PPC_LAST_ARG_REG || (size >= 3 && size % 4 != 0)) { - cinfo->args [n].size = 0; - cinfo->args [n].vtsize = nwords; - } else { - cinfo->args [n].size = n_in_regs; - cinfo->args [n].vtsize = nwords - n_in_regs; - cinfo->args [n].reg = gr; - } - gr += n_in_regs; - cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size; - /*g_print ("offset for arg %d at %d\n", n, PPC_STACK_PARAM_OFFSET + stack_size);*/ - stack_size += nwords * sizeof (gpointer); - } -#else - add_general (&gr, &stack_size, cinfo->args + n, TRUE); - cinfo->args [n].regtype = RegTypeStructByAddr; - cinfo->args [n].vtsize = size; + nregs = (align_size + sizeof (gpointer) -1 ) / sizeof (gpointer); + n_in_regs = MIN (rest, nregs); + if (n_in_regs < 0) + n_in_regs = 0; +#ifdef __APPLE__ + /* FIXME: check this */ + if (size >= 3 && size % 4 != 0) + n_in_regs = 0; #endif - n++; - break; - } - case MONO_TYPE_TYPEDBYREF: { - int size = sizeof (MonoTypedRef); - /* keep in sync or merge with the valuetype case */ -#if PPC_PASS_STRUCTS_BY_VALUE - { - int nwords = (size + sizeof (gpointer) -1 ) / sizeof (gpointer); cinfo->args [n].regtype = RegTypeStructByVal; - if (gr <= PPC_LAST_ARG_REG) { - int rest = PPC_LAST_ARG_REG - gr + 1; - int n_in_regs = rest >= nwords? nwords: rest; - cinfo->args [n].size = n_in_regs; - cinfo->args [n].vtsize = nwords - n_in_regs; - cinfo->args [n].reg = gr; - gr += n_in_regs; - } else { - cinfo->args [n].size = 0; - cinfo->args [n].vtsize = nwords; - } + cinfo->args [n].vtregs = n_in_regs; + cinfo->args [n].size = n_in_regs; + cinfo->args [n].vtsize = nregs - n_in_regs; + cinfo->args [n].reg = gr; + +#ifdef __mono_ppc64__ + if (nregs == 1 && is_pinvoke) + cinfo->args [n].bytes = size; + else +#endif + cinfo->args [n].bytes = 0; + gr += n_in_regs; cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size; /*g_print ("offset for arg %d at %d\n", n, PPC_STACK_PARAM_OFFSET + stack_size);*/ - stack_size += nwords * sizeof (gpointer); + stack_size += nregs * sizeof (gpointer); } #else add_general (&gr, &stack_size, cinfo->args + n, TRUE); @@ -889,7 +1169,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke) case MONO_TYPE_U8: case MONO_TYPE_I8: cinfo->args [n].size = 8; - add_general (&gr, &stack_size, cinfo->args + n, sizeof (gpointer) == 8); + add_general (&gr, &stack_size, cinfo->args + n, SIZEOF_REGISTER == 8); n++; break; case MONO_TYPE_R4: @@ -901,12 +1181,12 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke) cinfo->args [n].reg = fr; fr ++; FP_ALSO_IN_REG (gr ++); - ALWAYS_ON_STACK (stack_size += 4); + ALWAYS_ON_STACK (stack_size += SIZEOF_REGISTER); } else { - cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size; + cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size + MONO_PPC_32_64_CASE (0, 4); cinfo->args [n].regtype = RegTypeBase; cinfo->args [n].reg = ppc_sp; /* in the caller*/ - stack_size += 4; + stack_size += SIZEOF_REGISTER; } n++; break; @@ -917,7 +1197,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke) cinfo->args [n].regtype = RegTypeFP; cinfo->args [n].reg = fr; fr ++; - FP_ALSO_IN_REG (gr += 2); + FP_ALSO_IN_REG (gr += sizeof (double) / SIZEOF_REGISTER); ALWAYS_ON_STACK (stack_size += 8); } else { cinfo->args [n].offset = PPC_STACK_PARAM_OFFSET + stack_size; @@ -941,8 +1221,8 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke) } { - simpletype = mini_type_get_underlying_type (NULL, sig->ret)->type; - switch (simpletype) { + simpletype = mini_type_get_underlying_type (NULL, sig->ret); + switch (simpletype->type) { case MONO_TYPE_BOOLEAN: case MONO_TYPE_I1: case MONO_TYPE_U1: @@ -972,7 +1252,7 @@ calculate_sizes (MonoMethodSignature *sig, gboolean is_pinvoke) cinfo->ret.regtype = RegTypeFP; break; case MONO_TYPE_GENERICINST: - if (!mono_type_generic_inst_is_valuetype (sig->ret)) { + if (!mono_type_generic_inst_is_valuetype (simpletype)) { cinfo->ret.reg = ppc_r3; break; } @@ -1058,7 +1338,7 @@ mono_arch_allocate_vars (MonoCompile *m) if (m->method->wrapper_type == MONO_WRAPPER_MANAGED_TO_NATIVE) m->param_area = MAX (m->param_area, sizeof (gpointer)*8); - header = mono_method_get_header (m->method); + header = m->header; /* * We use the frame register also for any method that has @@ -1193,6 +1473,8 @@ mono_arch_allocate_vars (MonoCompile *m) } else { size = mono_type_size (sig->params [i], &align); } + if (MONO_TYPE_ISSTRUCT (sig->params [i]) && size < sizeof (gpointer)) + size = align = sizeof (gpointer); offset += align - 1; offset &= ~(align - 1); inst->inst_offset = offset; @@ -1242,6 +1524,9 @@ emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo) { int sig_reg = mono_alloc_ireg (cfg); + /* FIXME: Add support for signature tokens to AOT */ + cfg->disable_aot = TRUE; + MONO_EMIT_NEW_ICONST (cfg, sig_reg, (gulong)call->signature); MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, ppc_r1, cinfo->sig_cookie.offset, sig_reg); @@ -1276,6 +1561,7 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call) in = call->args [i]; if (ainfo->regtype == RegTypeGeneral) { +#ifndef __mono_ppc64__ if (!t->byref && ((t->type == MONO_TYPE_I8) || (t->type == MONO_TYPE_U8))) { MONO_INST_NEW (cfg, ins, OP_MOVE); ins->dreg = mono_alloc_ireg (cfg); @@ -1288,7 +1574,9 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call) ins->sreg1 = in->dreg + 2; MONO_ADD_INS (cfg->cbb, ins); mono_call_inst_add_outarg_reg (cfg, call, ins->dreg, ainfo->reg, FALSE); - } else { + } else +#endif + { MONO_INST_NEW (cfg, ins, OP_MOVE); ins->dreg = mono_alloc_ireg (cfg); ins->sreg1 = in->dreg; @@ -1376,7 +1664,7 @@ mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call) } call->stack_usage = cinfo->stack_usage; - cfg->param_area = MAX (cfg->param_area, cinfo->stack_usage); + cfg->param_area = MAX (PPC_MINIMAL_PARAM_AREA_SIZE, MAX (cfg->param_area, cinfo->stack_usage)); cfg->flags |= MONO_CFG_HAS_CALLS; g_free (cinfo); @@ -1392,7 +1680,9 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src) int i, soffset, dreg; if (ainfo->regtype == RegTypeStructByVal) { +#ifdef __APPLE__ guint32 size = 0; +#endif soffset = 0; #ifdef __APPLE__ /* @@ -1413,9 +1703,16 @@ mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src) mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg, FALSE); } else #endif - for (i = 0; i < ainfo->size; ++i) { + for (i = 0; i < ainfo->vtregs; ++i) { + int antipadding = 0; + if (ainfo->bytes) { + g_assert (i == 0); + antipadding = sizeof (gpointer) - ainfo->bytes; + } dreg = mono_alloc_ireg (cfg); MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, soffset); + if (antipadding) + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHR_UN_IMM, dreg, dreg, antipadding * 8); mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg + i, FALSE); soffset += sizeof (gpointer); } @@ -1462,6 +1759,7 @@ mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val) mono_method_signature (method)->ret); if (!ret->byref) { +#ifndef __mono_ppc64__ if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) { MonoInst *ins; @@ -1471,6 +1769,7 @@ mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val) MONO_ADD_INS (cfg->cbb, ins); return; } +#endif if (ret->type == MONO_TYPE_R8 || ret->type == MONO_TYPE_R4) { MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg); return; @@ -1495,7 +1794,7 @@ mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean ena { guchar *code = p; - ppc_load (code, ppc_r3, cfg->method); + ppc_load_ptr (code, ppc_r3, cfg->method); ppc_li (code, ppc_r4, 0); /* NULL ebp for now */ ppc_load_func (code, ppc_r0, func); ppc_mtlr (code, ppc_r0); @@ -1512,7 +1811,7 @@ enum { }; void* -mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments) +mono_arch_instrument_epilog_full (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments, gboolean preserve_argument_registers) { guchar *code = p; int save_mode = SAVE_NONE; @@ -1540,10 +1839,12 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena else save_mode = SAVE_NONE; break; +#ifndef __mono_ppc64__ case MONO_TYPE_I8: case MONO_TYPE_U8: save_mode = SAVE_TWO; break; +#endif case MONO_TYPE_R4: case MONO_TYPE_R8: save_mode = SAVE_FP; @@ -1566,7 +1867,7 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena } break; case SAVE_ONE: - ppc_store_reg (code, ppc_r3, save_offset, cfg->frame_reg); + ppc_stptr (code, ppc_r3, save_offset, cfg->frame_reg); if (enable_arguments) { ppc_mr (code, ppc_r4, ppc_r3); } @@ -1576,6 +1877,7 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena if (enable_arguments) { /* FIXME: what reg? */ ppc_fmr (code, ppc_f3, ppc_f1); + /* FIXME: use 8 byte load on PPC64 */ ppc_lwz (code, ppc_r4, save_offset, cfg->frame_reg); ppc_lwz (code, ppc_r5, save_offset + 4, cfg->frame_reg); } @@ -1591,7 +1893,7 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena break; } - ppc_load (code, ppc_r3, cfg->method); + ppc_load_ptr (code, ppc_r3, cfg->method); ppc_load_func (code, ppc_r0, func); ppc_mtlr (code, ppc_r0); ppc_blrl (code); @@ -1602,7 +1904,7 @@ mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean ena ppc_lwz (code, ppc_r4, save_offset + 4, cfg->frame_reg); break; case SAVE_ONE: - ppc_load_reg (code, ppc_r3, save_offset, cfg->frame_reg); + ppc_ldptr (code, ppc_r3, save_offset, cfg->frame_reg); break; case SAVE_FP: ppc_lfd (code, ppc_f1, save_offset, cfg->frame_reg); @@ -1633,31 +1935,22 @@ typedef struct { } MonoOvfJump; #define EMIT_COND_BRANCH_FLAGS(ins,b0,b1) \ -if (ins->flags & MONO_INST_BRLABEL) { \ - if (0 && ins->inst_i0->inst_c0) { \ - ppc_bc (code, (b0), (b1), (code - cfg->native_code + ins->inst_i0->inst_c0) & 0xffff); \ - } else { \ - mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \ - ppc_bc (code, (b0), (b1), 0); \ - } \ +if (0 && ins->inst_true_bb->native_offset) { \ + ppc_bc (code, (b0), (b1), (code - cfg->native_code + ins->inst_true_bb->native_offset) & 0xffff); \ } else { \ - if (0 && ins->inst_true_bb->native_offset) { \ - ppc_bc (code, (b0), (b1), (code - cfg->native_code + ins->inst_true_bb->native_offset) & 0xffff); \ - } else { \ - int br_disp = ins->inst_true_bb->max_offset - offset; \ - if (!ppc_is_imm16 (br_disp + 1024) || ! ppc_is_imm16 (ppc_is_imm16 (br_disp - 1024))) { \ - MonoOvfJump *ovfj = mono_mempool_alloc (cfg->mempool, sizeof (MonoOvfJump)); \ - ovfj->data.bb = ins->inst_true_bb; \ - ovfj->ip_offset = 0; \ - ovfj->b0_cond = (b0); \ - ovfj->b1_cond = (b1); \ - mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB_OVF, ovfj); \ - ppc_b (code, 0); \ - } else { \ - mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \ - ppc_bc (code, (b0), (b1), 0); \ - } \ - } \ + int br_disp = ins->inst_true_bb->max_offset - offset; \ + if (!ppc_is_imm16 (br_disp + 1024) || ! ppc_is_imm16 (ppc_is_imm16 (br_disp - 1024))) { \ + MonoOvfJump *ovfj = mono_mempool_alloc (cfg->mempool, sizeof (MonoOvfJump)); \ + ovfj->data.bb = ins->inst_true_bb; \ + ovfj->ip_offset = 0; \ + ovfj->b0_cond = (b0); \ + ovfj->b1_cond = (b1); \ + mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB_OVF, ovfj); \ + ppc_b (code, 0); \ + } else { \ + mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \ + ppc_bc (code, (b0), (b1), 0); \ + } \ } #define EMIT_COND_BRANCH(ins,cond) EMIT_COND_BRANCH_FLAGS(ins, branch_b0_table [(cond)], branch_b1_table [(cond)]) @@ -1693,13 +1986,38 @@ mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb) { } +static int +normalize_opcode (int opcode) +{ + switch (opcode) { +#ifndef __mono_ilp32__ + case MONO_PPC_32_64_CASE (OP_LOADI4_MEMBASE, OP_LOADI8_MEMBASE): + return OP_LOAD_MEMBASE; + case MONO_PPC_32_64_CASE (OP_LOADI4_MEMINDEX, OP_LOADI8_MEMINDEX): + return OP_LOAD_MEMINDEX; + case MONO_PPC_32_64_CASE (OP_STOREI4_MEMBASE_REG, OP_STOREI8_MEMBASE_REG): + return OP_STORE_MEMBASE_REG; + case MONO_PPC_32_64_CASE (OP_STOREI4_MEMBASE_IMM, OP_STOREI8_MEMBASE_IMM): + return OP_STORE_MEMBASE_IMM; + case MONO_PPC_32_64_CASE (OP_STOREI4_MEMINDEX, OP_STOREI8_MEMINDEX): + return OP_STORE_MEMINDEX; +#endif + case MONO_PPC_32_64_CASE (OP_ISHR_IMM, OP_LSHR_IMM): + return OP_SHR_IMM; + case MONO_PPC_32_64_CASE (OP_ISHR_UN_IMM, OP_LSHR_UN_IMM): + return OP_SHR_UN_IMM; + default: + return opcode; + } +} + void mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) { MonoInst *ins, *n, *last_ins = NULL; MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) { - switch (ins->opcode) { + switch (normalize_opcode (ins->opcode)) { case OP_MUL_IMM: /* remove unnecessary multiplication with 1 */ if (ins->inst_imm == 1) { @@ -1718,13 +2036,11 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) } break; case OP_LOAD_MEMBASE: - case OP_LOADI4_MEMBASE: /* * OP_STORE_MEMBASE_REG reg, offset(basereg) * OP_LOAD_MEMBASE offset(basereg), reg */ - if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG - || last_ins->opcode == OP_STORE_MEMBASE_REG) && + if (last_ins && normalize_opcode (last_ins->opcode) == OP_STORE_MEMBASE_REG && ins->inst_basereg == last_ins->inst_destbasereg && ins->inst_offset == last_ins->inst_offset) { if (ins->dreg == last_ins->sreg1) { @@ -1744,8 +2060,7 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) * OP_LOAD_MEMBASE offset(basereg), reg1 * OP_MOVE reg1, reg2 */ - } if (last_ins && (last_ins->opcode == OP_LOADI4_MEMBASE - || last_ins->opcode == OP_LOAD_MEMBASE) && + } else if (last_ins && normalize_opcode (last_ins->opcode) == OP_LOAD_MEMBASE && ins->inst_basereg != last_ins->dreg && ins->inst_basereg == last_ins->inst_basereg && ins->inst_offset == last_ins->inst_offset) { @@ -1768,8 +2083,7 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) * OP_STORE_MEMBASE_IMM imm, offset(basereg) * OP_ICONST reg, imm */ - } else if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_IMM - || last_ins->opcode == OP_STORE_MEMBASE_IMM) && + } else if (last_ins && normalize_opcode (last_ins->opcode) == OP_STORE_MEMBASE_IMM && ins->inst_basereg == last_ins->inst_destbasereg && ins->inst_offset == last_ins->inst_offset) { //static int c = 0; printf ("MATCHX %s %d\n", cfg->method->name,c++); @@ -1797,6 +2111,17 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) ins->sreg1 = last_ins->sreg1; } break; +#ifdef __mono_ppc64__ + case OP_LOADU4_MEMBASE: + case OP_LOADI4_MEMBASE: + if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) && + ins->inst_basereg == last_ins->inst_destbasereg && + ins->inst_offset == last_ins->inst_offset) { + ins->opcode = (ins->opcode == OP_LOADI4_MEMBASE) ? OP_ICONV_TO_I4 : OP_ICONV_TO_U4; + ins->sreg1 = last_ins->sreg1; + } + break; +#endif case OP_MOVE: ins->opcode = OP_MOVE; /* @@ -1848,32 +2173,40 @@ mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins) ins->opcode = OP_NOP; break; } +#ifndef __mono_ppc64__ case OP_ICONV_TO_R4: case OP_ICONV_TO_R8: { - /* FIXME: change precision for CEE_CONV_R4 */ - static const guint64 adjust_val = 0x4330000080000000ULL; - int msw_reg = mono_alloc_ireg (cfg); - int xored = mono_alloc_ireg (cfg); - int adj_reg = mono_alloc_freg (cfg); - int tmp_reg = mono_alloc_freg (cfg); - int basereg = ppc_sp; - int offset = -8; - if (!ppc_is_imm16 (offset + 4)) { - basereg = mono_alloc_ireg (cfg); - MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset); + /* If we have a PPC_FEATURE_64 machine we can avoid + this and use the fcfid instruction. Otherwise + on an old 32-bit chip and we have to do this the + hard way. */ + if (!(cpu_hw_caps & PPC_ISA_64)) { + /* FIXME: change precision for CEE_CONV_R4 */ + static const guint64 adjust_val = 0x4330000080000000ULL; + int msw_reg = mono_alloc_ireg (cfg); + int xored = mono_alloc_ireg (cfg); + int adj_reg = mono_alloc_freg (cfg); + int tmp_reg = mono_alloc_freg (cfg); + int basereg = ppc_sp; + int offset = -8; + if (!ppc_is_imm16 (offset + 4)) { + basereg = mono_alloc_ireg (cfg); + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_IADD_IMM, basereg, cfg->frame_reg, offset); + } + MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000); + MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, msw_reg); + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_XOR_IMM, xored, ins->sreg1, 0x80000000); + MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, xored); + MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, (gpointer)&adjust_val); + MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, basereg, offset); + MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg); + if (ins->opcode == OP_ICONV_TO_R4) + MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, ins->dreg, ins->dreg); + ins->opcode = OP_NOP; } - MONO_EMIT_NEW_ICONST (cfg, msw_reg, 0x43300000); - MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset, msw_reg); - MONO_EMIT_NEW_BIALU_IMM (cfg, OP_XOR_IMM, xored, ins->sreg1, 0x80000000); - MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STOREI4_MEMBASE_REG, basereg, offset + 4, xored); - MONO_EMIT_NEW_LOAD_R8 (cfg, adj_reg, (gpointer)&adjust_val); - MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADR8_MEMBASE, tmp_reg, basereg, offset); - MONO_EMIT_NEW_BIALU (cfg, OP_FSUB, ins->dreg, tmp_reg, adj_reg); - if (ins->opcode == OP_ICONV_TO_R4) - MONO_EMIT_NEW_UNALU (cfg, OP_FCONV_TO_R4, ins->dreg, ins->dreg); - ins->opcode = OP_NOP; break; } +#endif case OP_CKFINITE: { int msw_reg = mono_alloc_ireg (cfg); int basereg = ppc_sp; @@ -1889,6 +2222,66 @@ mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins) ins->opcode = OP_NOP; break; } +#ifdef __mono_ppc64__ + case OP_IADD_OVF: + case OP_IADD_OVF_UN: + case OP_ISUB_OVF: { + int shifted1_reg = mono_alloc_ireg (cfg); + int shifted2_reg = mono_alloc_ireg (cfg); + int result_shifted_reg = mono_alloc_ireg (cfg); + + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHL_IMM, shifted1_reg, ins->sreg1, 32); + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHL_IMM, shifted2_reg, ins->sreg2, 32); + MONO_EMIT_NEW_BIALU (cfg, ins->opcode, result_shifted_reg, shifted1_reg, shifted2_reg); + if (ins->opcode == OP_IADD_OVF_UN) + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHR_UN_IMM, ins->dreg, result_shifted_reg, 32); + else + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHR_IMM, ins->dreg, result_shifted_reg, 32); + ins->opcode = OP_NOP; + } +#endif + } +} + +void +mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *ins) +{ + switch (ins->opcode) { + case OP_LADD_OVF: + /* ADC sets the condition code */ + MONO_EMIT_NEW_BIALU (cfg, OP_ADDCC, ins->dreg + 1, ins->sreg1 + 1, ins->sreg2 + 1); + MONO_EMIT_NEW_BIALU (cfg, OP_ADD_OVF_CARRY, ins->dreg + 2, ins->sreg1 + 2, ins->sreg2 + 2); + NULLIFY_INS (ins); + break; + case OP_LADD_OVF_UN: + /* ADC sets the condition code */ + MONO_EMIT_NEW_BIALU (cfg, OP_ADDCC, ins->dreg + 1, ins->sreg1 + 1, ins->sreg2 + 1); + MONO_EMIT_NEW_BIALU (cfg, OP_ADD_OVF_UN_CARRY, ins->dreg + 2, ins->sreg1 + 2, ins->sreg2 + 2); + NULLIFY_INS (ins); + break; + case OP_LSUB_OVF: + /* SBB sets the condition code */ + MONO_EMIT_NEW_BIALU (cfg, OP_SUBCC, ins->dreg + 1, ins->sreg1 + 1, ins->sreg2 + 1); + MONO_EMIT_NEW_BIALU (cfg, OP_SUB_OVF_CARRY, ins->dreg + 2, ins->sreg1 + 2, ins->sreg2 + 2); + NULLIFY_INS (ins); + break; + case OP_LSUB_OVF_UN: + /* SBB sets the condition code */ + MONO_EMIT_NEW_BIALU (cfg, OP_SUBCC, ins->dreg + 1, ins->sreg1 + 1, ins->sreg2 + 1); + MONO_EMIT_NEW_BIALU (cfg, OP_SUB_OVF_UN_CARRY, ins->dreg + 2, ins->sreg1 + 2, ins->sreg2 + 2); + NULLIFY_INS (ins); + break; + case OP_LNEG: + /* This is the old version from inssel-long32.brg */ + MONO_EMIT_NEW_UNALU (cfg, OP_INOT, ins->dreg + 1, ins->sreg1 + 1); + MONO_EMIT_NEW_UNALU (cfg, OP_INOT, ins->dreg + 2, ins->sreg1 + 2); + /* ADC sets the condition codes */ + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ADC_IMM, ins->dreg + 1, ins->dreg + 1, 1); + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_ADC_IMM, ins->dreg + 2, ins->dreg + 2, 0); + NULLIFY_INS (ins); + break; + default: + break; } } @@ -2065,26 +2458,35 @@ loop_start: /* handle rem separately */ goto loop_start; case OP_IREM: - case OP_IREM_UN: { + case OP_IREM_UN: + CASE_PPC64 (OP_LREM) + CASE_PPC64 (OP_LREM_UN) { MonoInst *mul; /* we change a rem dest, src1, src2 to * div temp1, src1, src2 * mul temp2, temp1, src2 * sub dest, src1, temp2 */ - NEW_INS (cfg, mul, OP_IMUL); - NEW_INS (cfg, temp, ins->opcode == OP_IREM? OP_IDIV: OP_IDIV_UN); + if (ins->opcode == OP_IREM || ins->opcode == OP_IREM_UN) { + NEW_INS (cfg, mul, OP_IMUL); + NEW_INS (cfg, temp, ins->opcode == OP_IREM? OP_IDIV: OP_IDIV_UN); + ins->opcode = OP_ISUB; + } else { + NEW_INS (cfg, mul, OP_LMUL); + NEW_INS (cfg, temp, ins->opcode == OP_LREM? OP_LDIV: OP_LDIV_UN); + ins->opcode = OP_LSUB; + } temp->sreg1 = ins->sreg1; temp->sreg2 = ins->sreg2; temp->dreg = mono_alloc_ireg (cfg); mul->sreg1 = temp->dreg; mul->sreg2 = ins->sreg2; mul->dreg = mono_alloc_ireg (cfg); - ins->opcode = OP_ISUB; ins->sreg2 = mul->dreg; break; } case OP_IADD_IMM: + CASE_PPC64 (OP_LADD_IMM) case OP_ADD_IMM: case OP_ADDCC_IMM: if (!ppc_is_imm16 (ins->inst_imm)) { @@ -2096,6 +2498,7 @@ loop_start: } break; case OP_ISUB_IMM: + CASE_PPC64 (OP_LSUB_IMM) case OP_SUB_IMM: if (!ppc_is_imm16 (-ins->inst_imm)) { NEW_INS (cfg, temp, OP_ICONST); @@ -2108,10 +2511,18 @@ loop_start: case OP_IAND_IMM: case OP_IOR_IMM: case OP_IXOR_IMM: + case OP_LAND_IMM: + case OP_LOR_IMM: + case OP_LXOR_IMM: case OP_AND_IMM: case OP_OR_IMM: - case OP_XOR_IMM: - if ((ins->inst_imm & 0xffff0000) && (ins->inst_imm & 0xffff)) { + case OP_XOR_IMM: { + gboolean is_imm = ((ins->inst_imm & 0xffff0000) && (ins->inst_imm & 0xffff)); +#ifdef __mono_ppc64__ + if (ins->inst_imm & 0xffffffff00000000ULL) + is_imm = TRUE; +#endif + if (is_imm) { NEW_INS (cfg, temp, OP_ICONST); temp->inst_c0 = ins->inst_imm; temp->dreg = mono_alloc_ireg (cfg); @@ -2119,6 +2530,7 @@ loop_start: ins->opcode = map_to_reg_reg_op (ins->opcode); } break; + } case OP_ISBB_IMM: case OP_IADC_IMM: case OP_SBB_IMM: @@ -2132,6 +2544,7 @@ loop_start: break; case OP_COMPARE_IMM: case OP_ICOMPARE_IMM: + CASE_PPC64 (OP_LCOMPARE_IMM) next = ins->next; /* Branch opts can eliminate the branch */ if (!next || (!(MONO_IS_COND_BRANCH_OP (next) || MONO_IS_COND_EXC (next) || MONO_IS_SETCC (next)))) { @@ -2191,6 +2604,7 @@ loop_start: break; case OP_LOAD_MEMBASE: case OP_LOADI4_MEMBASE: + CASE_PPC64 (OP_LOADI8_MEMBASE) case OP_LOADU4_MEMBASE: case OP_LOADI2_MEMBASE: case OP_LOADU2_MEMBASE: @@ -2199,6 +2613,7 @@ loop_start: case OP_LOADR4_MEMBASE: case OP_LOADR8_MEMBASE: case OP_STORE_MEMBASE_REG: + CASE_PPC64 (OP_STOREI8_MEMBASE_REG) case OP_STOREI4_MEMBASE_REG: case OP_STOREI2_MEMBASE_REG: case OP_STOREI1_MEMBASE_REG: @@ -2221,6 +2636,7 @@ loop_start: case OP_STOREI1_MEMBASE_IMM: case OP_STOREI2_MEMBASE_IMM: case OP_STOREI4_MEMBASE_IMM: + CASE_PPC64 (OP_STOREI8_MEMBASE_IMM) NEW_INS (cfg, temp, OP_ICONST); temp->inst_c0 = ins->inst_imm; temp->dreg = mono_alloc_ireg (cfg); @@ -2230,6 +2646,10 @@ loop_start: goto loop_start; /* make it handle the possibly big ins->inst_offset */ case OP_R8CONST: case OP_R4CONST: + if (cfg->compile_aot) { + /* Keep these in the aot case */ + break; + } NEW_INS (cfg, temp, OP_ICONST); temp->inst_c0 = (gulong)ins->inst_p0; temp->dreg = mono_alloc_ireg (cfg); @@ -2251,28 +2671,52 @@ loop_start: static guchar* emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int sreg, int size, gboolean is_signed) { - int offset = cfg->arch.fp_conv_var_offset; + long offset = cfg->arch.fp_conv_var_offset; + long sub_offset; /* sreg is a float, dreg is an integer reg. ppc_f0 is used a scratch */ - ppc_fctiwz (code, ppc_f0, sreg); - if (ppc_is_imm16 (offset + 4)) { +#ifdef __mono_ppc64__ + if (size == 8) { + ppc_fctidz (code, ppc_f0, sreg); + sub_offset = 0; + } else +#endif + { + ppc_fctiwz (code, ppc_f0, sreg); + sub_offset = 4; + } + if (ppc_is_imm16 (offset + sub_offset)) { ppc_stfd (code, ppc_f0, offset, cfg->frame_reg); - ppc_lwz (code, dreg, offset + 4, cfg->frame_reg); + if (size == 8) + ppc_ldr (code, dreg, offset + sub_offset, cfg->frame_reg); + else + ppc_lwz (code, dreg, offset + sub_offset, cfg->frame_reg); } else { ppc_load (code, dreg, offset); ppc_add (code, dreg, dreg, cfg->frame_reg); ppc_stfd (code, ppc_f0, 0, dreg); - ppc_lwz (code, dreg, 4, dreg); + if (size == 8) + ppc_ldr (code, dreg, sub_offset, dreg); + else + ppc_lwz (code, dreg, sub_offset, dreg); } if (!is_signed) { if (size == 1) ppc_andid (code, dreg, dreg, 0xff); else if (size == 2) ppc_andid (code, dreg, dreg, 0xffff); +#ifdef __mono_ppc64__ + else if (size == 4) + ppc_clrldi (code, dreg, dreg, 32); +#endif } else { if (size == 1) ppc_extsb (code, dreg, dreg); else if (size == 2) ppc_extsh (code, dreg, dreg); +#ifdef __mono_ppc64__ + else if (size == 4) + ppc_extsw (code, dreg, dreg); +#endif } return code; } @@ -2288,6 +2732,9 @@ typedef struct { static int search_thunk_slot (void *data, int csize, int bsize, void *user_data) { +#ifdef __mono_ppc64__ + g_assert_not_reached (); +#else PatchData *pdata = (PatchData*)user_data; guchar *code = data; guint32 *thunks = data; @@ -2304,8 +2751,7 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) { return 0; templ = (guchar*)load; - ppc_lis (templ, ppc_r0, (guint32)(pdata->target) >> 16); - ppc_ori (templ, ppc_r0, ppc_r0, (guint32)(pdata->target) & 0xffff); + ppc_load_sequence (templ, ppc_r0, pdata->target); //g_print ("thunk nentries: %d\n", ((char*)endthunks - (char*)thunks)/16); if ((pdata->found == 2) || (pdata->code >= code && pdata->code <= code + csize)) { @@ -2313,7 +2759,6 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) { //g_print ("looking for target: %p at %p (%08x-%08x)\n", pdata->target, thunks, thunks [0], thunks [1]); if ((thunks [0] == load [0]) && (thunks [1] == load [1])) { ppc_patch (pdata->code, (guchar*)thunks); - mono_arch_flush_icache (pdata->code, 4); pdata->found = 1; /*{ static int num_thunks = 0; @@ -2332,7 +2777,6 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) { mono_arch_flush_icache ((guchar*)thunks, 16); ppc_patch (pdata->code, (guchar*)thunks); - mono_arch_flush_icache (pdata->code, 4); pdata->found = 1; /*{ static int num_thunks = 0; @@ -2348,6 +2792,7 @@ search_thunk_slot (void *data, int csize, int bsize, void *user_data) { } //g_print ("failed thunk lookup for %p from %p at %p (%d entries)\n", pdata->target, pdata->code, data, count); } +#endif return 0; } @@ -2362,12 +2807,12 @@ handle_thunk (int absolute, guchar *code, const guchar *target) { pdata.found = 0; mono_domain_lock (domain); - mono_code_manager_foreach (domain->code_mp, search_thunk_slot, &pdata); + mono_domain_code_foreach (domain, search_thunk_slot, &pdata); if (!pdata.found) { /* this uses the first available slot */ pdata.found = 2; - mono_code_manager_foreach (domain->code_mp, search_thunk_slot, &pdata); + mono_domain_code_foreach (domain, search_thunk_slot, &pdata); } mono_domain_unlock (domain); @@ -2379,14 +2824,14 @@ handle_thunk (int absolute, guchar *code, const guchar *target) { static void patch_ins (guint8 *code, guint32 ins) { - *(guint32*)code = ins; + *(guint32*)code = GUINT32_TO_BE (ins); mono_arch_flush_icache (code, 4); } void -ppc_patch (guchar *code, const guchar *target) +ppc_patch_full (guchar *code, const guchar *target, gboolean is_fd) { - guint32 ins = *(guint32*)code; + guint32 ins = GUINT32_FROM_BE (*(guint32*)code); guint32 prim = ins >> 26; guint32 ovf; @@ -2394,6 +2839,7 @@ ppc_patch (guchar *code, const guchar *target) if (prim == 18) { // prefer relative branches, they are more position independent (e.g. for AOT compilation). gint diff = target - code; + g_assert (!is_fd); if (diff >= 0){ if (diff <= 33554431){ ins = (18 << 26) | (diff) | (ins & 1); @@ -2431,6 +2877,7 @@ ppc_patch (guchar *code, const guchar *target) if (prim == 16) { + g_assert (!is_fd); // absolute address if (ins & 2) { guint32 li = (gulong)target; @@ -2455,6 +2902,46 @@ ppc_patch (guchar *code, const guchar *target) } if (prim == 15 || ins == 0x4e800021 || ins == 0x4e800020 || ins == 0x4e800420) { +#ifdef __mono_ppc64__ + guint32 *seq = (guint32*)code; + guint32 *branch_ins; + + /* the trampoline code will try to patch the blrl, blr, bcctr */ + if (ins == 0x4e800021 || ins == 0x4e800020 || ins == 0x4e800420) { + branch_ins = seq; + if (ppc_opcode (seq [-3]) == 58 || ppc_opcode (seq [-3]) == 31) /* ld || mr */ + code -= 32; + else + code -= 24; + } else { + if (ppc_opcode (seq [5]) == 58 || ppc_opcode (seq [5]) == 31) /* ld || mr */ + branch_ins = seq + 8; + else + branch_ins = seq + 6; + } + + seq = (guint32*)code; + /* this is the lis/ori/sldi/oris/ori/(ld/ld|mr/nop)/mtlr/blrl sequence */ + g_assert (mono_ppc_is_direct_call_sequence (branch_ins)); + + if (ppc_opcode (seq [5]) == 58) { /* ld */ + g_assert (ppc_opcode (seq [6]) == 58); /* ld */ + + if (!is_fd) { + guint8 *buf = (guint8*)&seq [5]; + ppc_mr (buf, ppc_r0, ppc_r11); + ppc_nop (buf); + } + } else { + if (is_fd) + target = mono_get_addr_from_ftnptr ((gpointer)target); + } + + /* FIXME: make this thread safe */ + /* FIXME: we're assuming we're using r11 here */ + ppc_load_ptr_sequence (code, ppc_r11, target); + mono_arch_flush_icache ((guint8*)seq, 28); +#else guint32 *seq; /* the trampoline code will try to patch the blrl, blr, bcctr */ if (ins == 0x4e800021 || ins == 0x4e800020 || ins == 0x4e800420) { @@ -2470,12 +2957,25 @@ ppc_patch (guchar *code, const guchar *target) ppc_lis (code, ppc_r0, (guint32)(target) >> 16); ppc_ori (code, ppc_r0, ppc_r0, (guint32)(target) & 0xffff); mono_arch_flush_icache (code - 8, 8); +#endif } else { g_assert_not_reached (); } // g_print ("patched with 0x%08x\n", ins); } +void +ppc_patch (guchar *code, const guchar *target) +{ + ppc_patch_full (code, target, FALSE); +} + +void +mono_ppc_patch (guchar *code, const guchar *target) +{ + ppc_patch (code, target); +} + static guint8* emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code) { @@ -2519,7 +3019,7 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code) ArgInfo *ainfo = &cinfo->ret; inst = cfg->vret_addr; g_assert (ppc_is_imm16 (inst->inst_offset)); - ppc_load_reg (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); + ppc_ldptr (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); } for (i = 0; i < sig->param_count + sig->hasthis; ++i) { ArgInfo *ainfo = cinfo->args + i; @@ -2537,8 +3037,13 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code) case 2: ppc_lhz (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); break; +#ifdef __mono_ppc64__ + case 4: + ppc_lwz (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); + break; +#endif default: - ppc_load_reg (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); + ppc_ldptr (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); break; } break; @@ -2560,16 +3065,28 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code) MonoType *type = mini_type_get_underlying_type (cfg->generic_sharing_context, &inst->klass->byval_arg); - if (!MONO_TYPE_IS_REFERENCE (type) && type->type != MONO_TYPE_I4) +#ifndef __mono_ppc64__ + if (type->type == MONO_TYPE_I8) NOT_IMPLEMENTED; +#endif + + if (MONO_TYPE_IS_REFERENCE (type) || type->type == MONO_TYPE_I8) { + ppc_ldptr (code, ppc_r0, inst->inst_offset, inst->inst_basereg); + ppc_stptr (code, ppc_r0, ainfo->offset, ainfo->reg); + } else if (type->type == MONO_TYPE_I4) { + ppc_lwz (code, ppc_r0, inst->inst_offset, inst->inst_basereg); + ppc_stw (code, ppc_r0, ainfo->offset, ainfo->reg); + } else { + NOT_IMPLEMENTED; + } - ppc_lwz (code, ppc_r0, inst->inst_offset, inst->inst_basereg); - ppc_stw (code, ppc_r0, ainfo->offset, ainfo->reg); break; } case RegTypeStructByVal: { +#ifdef __APPLE__ guint32 size = 0; +#endif int j; /* FIXME: */ @@ -2587,10 +3104,13 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code) NOT_IMPLEMENTED; } else #endif - for (j = 0; j < ainfo->size; ++j) { - ppc_load_reg (code, ainfo->reg + j, + for (j = 0; j < ainfo->vtregs; ++j) { + ppc_ldptr (code, ainfo->reg + j, inst->inst_offset + j * sizeof (gpointer), inst->inst_basereg); + /* FIXME: shift to the right */ + if (ainfo->bytes) + NOT_IMPLEMENTED; } break; } @@ -2600,7 +3120,7 @@ emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code) g_assert (ppc_is_imm16 (addr->inst_offset)); g_assert (!ainfo->offset); - ppc_load_reg (code, ainfo->reg, addr->inst_offset, addr->inst_basereg); + ppc_ldptr (code, ainfo->reg, addr->inst_offset, addr->inst_basereg); struct_index++; break; @@ -2671,7 +3191,7 @@ ins_native_length (MonoCompile *cfg, MonoInst *ins) static guint8* emit_reserve_param_area (MonoCompile *cfg, guint8 *code) { - int size = cfg->param_area; + long size = cfg->param_area; size += MONO_ARCH_FRAME_ALIGNMENT - 1; size &= -MONO_ARCH_FRAME_ALIGNMENT; @@ -2679,12 +3199,12 @@ emit_reserve_param_area (MonoCompile *cfg, guint8 *code) if (!size) return code; - ppc_load_reg (code, ppc_r0, 0, ppc_sp); + ppc_ldptr (code, ppc_r0, 0, ppc_sp); if (ppc_is_imm16 (-size)) { - ppc_store_reg_update (code, ppc_r0, -size, ppc_sp); + ppc_stptr_update (code, ppc_r0, -size, ppc_sp); } else { ppc_load (code, ppc_r11, -size); - ppc_store_reg_update_indexed (code, ppc_r0, ppc_sp, ppc_r11); + ppc_stptr_update_indexed (code, ppc_r0, ppc_sp, ppc_r11); } return code; @@ -2693,7 +3213,7 @@ emit_reserve_param_area (MonoCompile *cfg, guint8 *code) static guint8* emit_unreserve_param_area (MonoCompile *cfg, guint8 *code) { - int size = cfg->param_area; + long size = cfg->param_area; size += MONO_ARCH_FRAME_ALIGNMENT - 1; size &= -MONO_ARCH_FRAME_ALIGNMENT; @@ -2701,17 +3221,19 @@ emit_unreserve_param_area (MonoCompile *cfg, guint8 *code) if (!size) return code; - ppc_load_reg (code, ppc_r0, 0, ppc_sp); + ppc_ldptr (code, ppc_r0, 0, ppc_sp); if (ppc_is_imm16 (size)) { - ppc_store_reg_update (code, ppc_r0, size, ppc_sp); + ppc_stptr_update (code, ppc_r0, size, ppc_sp); } else { ppc_load (code, ppc_r11, size); - ppc_store_reg_update_indexed (code, ppc_r0, ppc_sp, ppc_r11); + ppc_stptr_update_indexed (code, ppc_r0, ppc_sp, ppc_r11); } return code; } +#define MASK_SHIFT_IMM(i) ((i) & MONO_PPC_32_64_CASE (0x1f, 0x3f)) + void mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) { @@ -2722,6 +3244,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) MonoInst *last_ins = NULL; guint last_offset = 0; int max_len, cpos; + int L; /* we don't align basic blocks of loops on ppc */ @@ -2755,7 +3278,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) // g_print ("cil code\n"); mono_debug_record_line_number (cfg, ins, offset); - switch (ins->opcode) { + switch (normalize_opcode (ins->opcode)) { case OP_RELAXED_NOP: case OP_NOP: case OP_DUMMY_USE: @@ -2763,6 +3286,33 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_NOT_REACHED: case OP_NOT_NULL: break; + case OP_SEQ_POINT: { + int i; + + if (cfg->compile_aot) + NOT_IMPLEMENTED; + + /* + * Read from the single stepping trigger page. This will cause a + * SIGSEGV when single stepping is enabled. + * We do this _before_ the breakpoint, so single stepping after + * a breakpoint is hit will step to the next IL offset. + */ + if (ins->flags & MONO_INST_SINGLE_STEP_LOC) { + ppc_load (code, ppc_r11, (gsize)ss_trigger_page); + ppc_ldptr (code, ppc_r11, 0, ppc_r11); + } + + mono_add_seq_point (cfg, bb, ins, code - cfg->native_code); + + /* + * A placeholder for a possible breakpoint inserted by + * mono_arch_set_breakpoint (). + */ + for (i = 0; i < BREAKPOINT_SIZE / 4; ++i) + ppc_nop (code); + break; + } case OP_TLS_GET: emit_tls_access (code, ins->dreg, ins->inst_offset); break; @@ -2783,48 +3333,102 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) if (ppc_is_imm16 (ins->inst_offset)) { ppc_stb (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg); } else { - ppc_load (code, ppc_r0, ins->inst_offset); - ppc_stbx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0); + if (ppc_is_imm32 (ins->inst_offset)) { + ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset)); + ppc_stb (code, ins->sreg1, ins->inst_offset, ppc_r12); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_stbx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0); + } } break; case OP_STOREI2_MEMBASE_REG: if (ppc_is_imm16 (ins->inst_offset)) { ppc_sth (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg); } else { - ppc_load (code, ppc_r0, ins->inst_offset); - ppc_sthx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0); + if (ppc_is_imm32 (ins->inst_offset)) { + ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset)); + ppc_sth (code, ins->sreg1, ins->inst_offset, ppc_r12); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_sthx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0); + } } break; case OP_STORE_MEMBASE_REG: - case OP_STOREI4_MEMBASE_REG: if (ppc_is_imm16 (ins->inst_offset)) { - ppc_stw (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg); + ppc_stptr (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg); + } else { + if (ppc_is_imm32 (ins->inst_offset)) { + ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset)); + ppc_stptr (code, ins->sreg1, ins->inst_offset, ppc_r12); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_stptr_indexed (code, ins->sreg1, ins->inst_destbasereg, ppc_r0); + } + } + break; +#ifdef __mono_ilp32__ + case OP_STOREI8_MEMBASE_REG: + if (ppc_is_imm16 (ins->inst_offset)) { + ppc_str (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg); } else { ppc_load (code, ppc_r0, ins->inst_offset); - ppc_stwx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0); + ppc_str_indexed (code, ins->sreg1, ins->inst_destbasereg, ppc_r0); } break; +#endif case OP_STOREI1_MEMINDEX: - ppc_stbx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg); + ppc_stbx (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2); break; case OP_STOREI2_MEMINDEX: - ppc_sthx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg); + ppc_sthx (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2); break; case OP_STORE_MEMINDEX: - case OP_STOREI4_MEMINDEX: - ppc_stwx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg); + ppc_stptr_indexed (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2); break; case OP_LOADU4_MEM: g_assert_not_reached (); break; case OP_LOAD_MEMBASE: + if (ppc_is_imm16 (ins->inst_offset)) { + ppc_ldptr (code, ins->dreg, ins->inst_offset, ins->inst_basereg); + } else { + if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) { + ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset)); + ppc_ldptr (code, ins->dreg, ins->inst_offset, ins->dreg); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_ldptr_indexed (code, ins->dreg, ins->inst_basereg, ppc_r0); + } + } + break; case OP_LOADI4_MEMBASE: +#ifdef __mono_ppc64__ + if (ppc_is_imm16 (ins->inst_offset)) { + ppc_lwa (code, ins->dreg, ins->inst_offset, ins->inst_basereg); + } else { + if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) { + ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset)); + ppc_lwa (code, ins->dreg, ins->inst_offset, ins->dreg); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_lwax (code, ins->dreg, ins->inst_basereg, ppc_r0); + } + } + break; +#endif case OP_LOADU4_MEMBASE: if (ppc_is_imm16 (ins->inst_offset)) { ppc_lwz (code, ins->dreg, ins->inst_offset, ins->inst_basereg); } else { - ppc_load (code, ppc_r0, ins->inst_offset); - ppc_lwzx (code, ins->dreg, ins->inst_basereg, ppc_r0); + if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) { + ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset)); + ppc_lwz (code, ins->dreg, ins->inst_offset, ins->dreg); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_lwzx (code, ins->dreg, ins->inst_basereg, ppc_r0); + } } break; case OP_LOADI1_MEMBASE: @@ -2832,8 +3436,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) if (ppc_is_imm16 (ins->inst_offset)) { ppc_lbz (code, ins->dreg, ins->inst_offset, ins->inst_basereg); } else { - ppc_load (code, ppc_r0, ins->inst_offset); - ppc_lbzx (code, ins->dreg, ins->inst_basereg, ppc_r0); + if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) { + ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset)); + ppc_lbz (code, ins->dreg, ins->inst_offset, ins->dreg); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_lbzx (code, ins->dreg, ins->inst_basereg, ppc_r0); + } } if (ins->opcode == OP_LOADI1_MEMBASE) ppc_extsb (code, ins->dreg, ins->dreg); @@ -2842,68 +3451,102 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) if (ppc_is_imm16 (ins->inst_offset)) { ppc_lhz (code, ins->dreg, ins->inst_offset, ins->inst_basereg); } else { - ppc_load (code, ppc_r0, ins->inst_offset); - ppc_lhzx (code, ins->dreg, ins->inst_basereg, ppc_r0); + if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) { + ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset)); + ppc_lhz (code, ins->dreg, ins->inst_offset, ins->dreg); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_lhzx (code, ins->dreg, ins->inst_basereg, ppc_r0); + } } break; case OP_LOADI2_MEMBASE: if (ppc_is_imm16 (ins->inst_offset)) { - ppc_lha (code, ins->dreg, ins->inst_basereg, ins->inst_offset); + ppc_lha (code, ins->dreg, ins->inst_offset, ins->inst_basereg); + } else { + if (ppc_is_imm32 (ins->inst_offset) && (ins->dreg > 0)) { + ppc_addis (code, ins->dreg, ins->inst_basereg, ppc_ha(ins->inst_offset)); + ppc_lha (code, ins->dreg, ins->inst_offset, ins->dreg); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_lhax (code, ins->dreg, ins->inst_basereg, ppc_r0); + } + } + break; +#ifdef __mono_ilp32__ + case OP_LOADI8_MEMBASE: + if (ppc_is_imm16 (ins->inst_offset)) { + ppc_ldr (code, ins->dreg, ins->inst_offset, ins->inst_basereg); } else { ppc_load (code, ppc_r0, ins->inst_offset); - ppc_lhax (code, ins->dreg, ins->inst_basereg, ppc_r0); + ppc_ldr_indexed (code, ins->dreg, ins->inst_basereg, ppc_r0); } break; +#endif case OP_LOAD_MEMINDEX: + ppc_ldptr_indexed (code, ins->dreg, ins->inst_basereg, ins->sreg2); + break; case OP_LOADI4_MEMINDEX: +#ifdef __mono_ppc64__ + ppc_lwax (code, ins->dreg, ins->inst_basereg, ins->sreg2); + break; +#endif case OP_LOADU4_MEMINDEX: - ppc_lwzx (code, ins->dreg, ins->sreg2, ins->inst_basereg); + ppc_lwzx (code, ins->dreg, ins->inst_basereg, ins->sreg2); break; case OP_LOADU2_MEMINDEX: - ppc_lhzx (code, ins->dreg, ins->sreg2, ins->inst_basereg); + ppc_lhzx (code, ins->dreg, ins->inst_basereg, ins->sreg2); break; case OP_LOADI2_MEMINDEX: - ppc_lhax (code, ins->dreg, ins->sreg2, ins->inst_basereg); + ppc_lhax (code, ins->dreg, ins->inst_basereg, ins->sreg2); break; case OP_LOADU1_MEMINDEX: - ppc_lbzx (code, ins->dreg, ins->sreg2, ins->inst_basereg); + ppc_lbzx (code, ins->dreg, ins->inst_basereg, ins->sreg2); break; case OP_LOADI1_MEMINDEX: - ppc_lbzx (code, ins->dreg, ins->sreg2, ins->inst_basereg); + ppc_lbzx (code, ins->dreg, ins->inst_basereg, ins->sreg2); ppc_extsb (code, ins->dreg, ins->dreg); break; case OP_ICONV_TO_I1: + CASE_PPC64 (OP_LCONV_TO_I1) ppc_extsb (code, ins->dreg, ins->sreg1); break; case OP_ICONV_TO_I2: + CASE_PPC64 (OP_LCONV_TO_I2) ppc_extsh (code, ins->dreg, ins->sreg1); break; case OP_ICONV_TO_U1: + CASE_PPC64 (OP_LCONV_TO_U1) ppc_clrlwi (code, ins->dreg, ins->sreg1, 24); break; case OP_ICONV_TO_U2: + CASE_PPC64 (OP_LCONV_TO_U2) ppc_clrlwi (code, ins->dreg, ins->sreg1, 16); break; case OP_COMPARE: case OP_ICOMPARE: + CASE_PPC64 (OP_LCOMPARE) + L = (sizeof (mgreg_t) == 4 || ins->opcode == OP_ICOMPARE) ? 0 : 1; next = ins->next; if (next && compare_opcode_is_unsigned (next->opcode)) - ppc_cmpl (code, 0, 0, ins->sreg1, ins->sreg2); + ppc_cmpl (code, 0, L, ins->sreg1, ins->sreg2); else - ppc_cmp (code, 0, 0, ins->sreg1, ins->sreg2); + ppc_cmp (code, 0, L, ins->sreg1, ins->sreg2); break; case OP_COMPARE_IMM: case OP_ICOMPARE_IMM: + CASE_PPC64 (OP_LCOMPARE_IMM) + L = (sizeof (mgreg_t) == 4 || ins->opcode == OP_ICOMPARE_IMM) ? 0 : 1; next = ins->next; if (next && compare_opcode_is_unsigned (next->opcode)) { if (ppc_is_uimm16 (ins->inst_imm)) { - ppc_cmpli (code, 0, 0, ins->sreg1, (ins->inst_imm & 0xffff)); + ppc_cmpli (code, 0, L, ins->sreg1, (ins->inst_imm & 0xffff)); } else { g_assert_not_reached (); } } else { if (ppc_is_imm16 (ins->inst_imm)) { - ppc_cmpi (code, 0, 0, ins->sreg1, (ins->inst_imm & 0xffff)); + ppc_cmpi (code, 0, L, ins->sreg1, (ins->inst_imm & 0xffff)); } else { g_assert_not_reached (); } @@ -2917,6 +3560,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ppc_addco (code, ins->dreg, ins->sreg1, ins->sreg2); break; case OP_IADD: + CASE_PPC64 (OP_LADD) ppc_add (code, ins->dreg, ins->sreg1, ins->sreg2); break; case OP_ADC: @@ -2932,6 +3576,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_ADD_IMM: case OP_IADD_IMM: + CASE_PPC64 (OP_LADD_IMM) if (ppc_is_imm16 (ins->inst_imm)) { ppc_addi (code, ins->dreg, ins->sreg1, ins->inst_imm); } else { @@ -2955,6 +3600,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException"); break; case OP_ISUB_OVF: + CASE_PPC64 (OP_LSUB_OVF) /* check XER [0-3] (SO, OV, CA): we can't use mcrxr */ ppc_subfo (code, ins->dreg, ins->sreg2, ins->sreg1); @@ -2963,6 +3609,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException"); break; case OP_ISUB_OVF_UN: + CASE_PPC64 (OP_LSUB_OVF_UN) /* check XER [0-3] (SO, OV, CA): we can't use mcrxr */ ppc_subfc (code, ins->dreg, ins->sreg2, ins->sreg1); @@ -3007,6 +3654,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ppc_subfco (code, ins->dreg, ins->sreg2, ins->sreg1); break; case OP_ISUB: + CASE_PPC64 (OP_LSUB) ppc_subf (code, ins->dreg, ins->sreg2, ins->sreg1); break; case OP_SBB: @@ -3015,6 +3663,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_SUB_IMM: case OP_ISUB_IMM: + CASE_PPC64 (OP_LSUB_IMM) // we add the negated value if (ppc_is_imm16 (-ins->inst_imm)) ppc_addi (code, ins->dreg, ins->sreg1, -ins->inst_imm); @@ -3030,11 +3679,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ppc_subfze (code, ins->dreg, ins->sreg1); break; case OP_IAND: + CASE_PPC64 (OP_LAND) /* FIXME: the ppc macros as inconsistent here: put dest as the first arg! */ ppc_and (code, ins->sreg1, ins->dreg, ins->sreg2); break; case OP_AND_IMM: case OP_IAND_IMM: + CASE_PPC64 (OP_LAND_IMM) if (!(ins->inst_imm & 0xffff0000)) { ppc_andid (code, ins->sreg1, ins->dreg, ins->inst_imm); } else if (!(ins->inst_imm & 0xffff)) { @@ -3043,27 +3694,43 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) g_assert_not_reached (); } break; - case OP_IDIV: { + case OP_IDIV: + CASE_PPC64 (OP_LDIV) { guint8 *divisor_is_m1; /* XER format: SO, OV, CA, reserved [21 bits], count [8 bits] */ - ppc_cmpi (code, 0, 0, ins->sreg2, -1); + ppc_compare_reg_imm (code, 0, ins->sreg2, -1); divisor_is_m1 = code; ppc_bc (code, PPC_BR_FALSE | PPC_BR_LIKELY, PPC_BR_EQ, 0); ppc_lis (code, ppc_r0, 0x8000); - ppc_cmp (code, 0, 0, ins->sreg1, ppc_r0); +#ifdef __mono_ppc64__ + if (ins->opcode == OP_LDIV) + ppc_sldi (code, ppc_r0, ppc_r0, 32); +#endif + ppc_compare (code, 0, ins->sreg1, ppc_r0); EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_TRUE, PPC_BR_EQ, "ArithmeticException"); ppc_patch (divisor_is_m1, code); /* XER format: SO, OV, CA, reserved [21 bits], count [8 bits] */ - ppc_divwod (code, ins->dreg, ins->sreg1, ins->sreg2); + if (ins->opcode == OP_IDIV) + ppc_divwod (code, ins->dreg, ins->sreg1, ins->sreg2); +#ifdef __mono_ppc64__ + else + ppc_divdod (code, ins->dreg, ins->sreg1, ins->sreg2); +#endif ppc_mfspr (code, ppc_r0, ppc_xer); ppc_andisd (code, ppc_r0, ppc_r0, (1<<14)); EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException"); break; } case OP_IDIV_UN: - ppc_divwuod (code, ins->dreg, ins->sreg1, ins->sreg2); + CASE_PPC64 (OP_LDIV_UN) + if (ins->opcode == OP_IDIV_UN) + ppc_divwuod (code, ins->dreg, ins->sreg1, ins->sreg2); +#ifdef __mono_ppc64__ + else + ppc_divduod (code, ins->dreg, ins->sreg1, ins->sreg2); +#endif ppc_mfspr (code, ppc_r0, ppc_xer); ppc_andisd (code, ppc_r0, ppc_r0, (1<<14)); EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "DivideByZeroException"); @@ -3074,10 +3741,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_REM_IMM: g_assert_not_reached (); case OP_IOR: + CASE_PPC64 (OP_LOR) ppc_or (code, ins->dreg, ins->sreg1, ins->sreg2); break; case OP_OR_IMM: case OP_IOR_IMM: + CASE_PPC64 (OP_LOR_IMM) if (!(ins->inst_imm & 0xffff0000)) { ppc_ori (code, ins->sreg1, ins->dreg, ins->inst_imm); } else if (!(ins->inst_imm & 0xffff)) { @@ -3087,10 +3756,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } break; case OP_IXOR: + CASE_PPC64 (OP_LXOR) ppc_xor (code, ins->dreg, ins->sreg1, ins->sreg2); break; case OP_IXOR_IMM: case OP_XOR_IMM: + CASE_PPC64 (OP_LXOR_IMM) if (!(ins->inst_imm & 0xffff0000)) { ppc_xori (code, ins->sreg1, ins->dreg, ins->inst_imm); } else if (!(ins->inst_imm & 0xffff)) { @@ -3100,23 +3771,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } break; case OP_ISHL: - ppc_slw (code, ins->sreg1, ins->dreg, ins->sreg2); + CASE_PPC64 (OP_LSHL) + ppc_shift_left (code, ins->dreg, ins->sreg1, ins->sreg2); break; case OP_SHL_IMM: case OP_ISHL_IMM: - ppc_rlwinm (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f), 0, (31 - (ins->inst_imm & 0x1f))); + CASE_PPC64 (OP_LSHL_IMM) + ppc_shift_left_imm (code, ins->dreg, ins->sreg1, MASK_SHIFT_IMM (ins->inst_imm)); break; case OP_ISHR: ppc_sraw (code, ins->dreg, ins->sreg1, ins->sreg2); break; case OP_SHR_IMM: - case OP_ISHR_IMM: - ppc_srawi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f)); + ppc_shift_right_arith_imm (code, ins->dreg, ins->sreg1, MASK_SHIFT_IMM (ins->inst_imm)); break; case OP_SHR_UN_IMM: - case OP_ISHR_UN_IMM: - if (ins->inst_imm & 0x1f) - ppc_srwi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f)); + if (MASK_SHIFT_IMM (ins->inst_imm)) + ppc_shift_right_imm (code, ins->dreg, ins->sreg1, MASK_SHIFT_IMM (ins->inst_imm)); else ppc_mr (code, ins->dreg, ins->sreg1); break; @@ -3124,16 +3795,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ppc_srw (code, ins->dreg, ins->sreg1, ins->sreg2); break; case OP_INOT: + CASE_PPC64 (OP_LNOT) ppc_not (code, ins->dreg, ins->sreg1); break; case OP_INEG: + CASE_PPC64 (OP_LNEG) ppc_neg (code, ins->dreg, ins->sreg1); break; case OP_IMUL: - ppc_mullw (code, ins->dreg, ins->sreg1, ins->sreg2); + CASE_PPC64 (OP_LMUL) + ppc_multiply (code, ins->dreg, ins->sreg1, ins->sreg2); break; case OP_IMUL_IMM: case OP_MUL_IMM: + CASE_PPC64 (OP_LMUL_IMM) if (ppc_is_imm16 (ins->inst_imm)) { ppc_mulli (code, ins->dreg, ins->sreg1, ins->inst_imm); } else { @@ -3141,33 +3816,62 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } break; case OP_IMUL_OVF: + CASE_PPC64 (OP_LMUL_OVF) /* we annot use mcrxr, since it's not implemented on some processors * XER format: SO, OV, CA, reserved [21 bits], count [8 bits] */ - ppc_mullwo (code, ins->dreg, ins->sreg1, ins->sreg2); + if (ins->opcode == OP_IMUL_OVF) + ppc_mullwo (code, ins->dreg, ins->sreg1, ins->sreg2); +#ifdef __mono_ppc64__ + else + ppc_mulldo (code, ins->dreg, ins->sreg1, ins->sreg2); +#endif ppc_mfspr (code, ppc_r0, ppc_xer); ppc_andisd (code, ppc_r0, ppc_r0, (1<<14)); EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException"); break; case OP_IMUL_OVF_UN: + CASE_PPC64 (OP_LMUL_OVF_UN) /* we first multiply to get the high word and compare to 0 * to set the flags, then the result is discarded and then * we multiply to get the lower * bits result */ - ppc_mulhwu (code, ppc_r0, ins->sreg1, ins->sreg2); + if (ins->opcode == OP_IMUL_OVF_UN) + ppc_mulhwu (code, ppc_r0, ins->sreg1, ins->sreg2); +#ifdef __mono_ppc64__ + else + ppc_mulhdu (code, ppc_r0, ins->sreg1, ins->sreg2); +#endif ppc_cmpi (code, 0, 0, ppc_r0, 0); EMIT_COND_SYSTEM_EXCEPTION (CEE_BNE_UN - CEE_BEQ, "OverflowException"); - ppc_mullw (code, ins->dreg, ins->sreg1, ins->sreg2); + ppc_multiply (code, ins->dreg, ins->sreg1, ins->sreg2); break; case OP_ICONST: ppc_load (code, ins->dreg, ins->inst_c0); break; + case OP_I8CONST: { + ppc_load (code, ins->dreg, ins->inst_l); + break; + } + case OP_LOAD_GOTADDR: + /* The PLT implementation depends on this */ + g_assert (ins->dreg == ppc_r30); + + code = mono_arch_emit_load_got_addr (cfg->native_code, code, cfg, NULL); + break; + case OP_GOT_ENTRY: + // FIXME: Fix max instruction length + mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0); + /* arch_emit_got_access () patches this */ + ppc_load32 (code, ppc_r0, 0); + ppc_ldptr_indexed (code, ins->dreg, ins->inst_basereg, ppc_r0); + break; case OP_AOTCONST: mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0); ppc_load_sequence (code, ins->dreg, 0); break; - case OP_ICONV_TO_I4: - case OP_ICONV_TO_U4: + CASE_PPC32 (OP_ICONV_TO_I4) + CASE_PPC32 (OP_ICONV_TO_U4) case OP_MOVE: ppc_mr (code, ins->dreg, ins->sreg1); break; @@ -3190,7 +3894,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ppc_frsp (code, ins->dreg, ins->sreg1); break; case OP_JMP: { - int i, pos = 0; + int i, pos; /* * Keep in sync with mono_arch_emit_epilog @@ -3201,11 +3905,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) * we're leaving the method. */ if (1 || cfg->flags & MONO_CFG_HAS_CALLS) { - if (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET)) { - ppc_load_reg (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, cfg->frame_reg); + long ret_offset = cfg->stack_usage + PPC_RET_ADDR_OFFSET; + if (ppc_is_imm16 (ret_offset)) { + ppc_ldptr (code, ppc_r0, ret_offset, cfg->frame_reg); } else { - ppc_load (code, ppc_r11, cfg->stack_usage + PPC_RET_ADDR_OFFSET); - ppc_load_reg_indexed (code, ppc_r0, cfg->frame_reg, ppc_r11); + ppc_load (code, ppc_r11, ret_offset); + ppc_ldptr_indexed (code, ppc_r0, cfg->frame_reg, ppc_r11); } ppc_mtlr (code, ppc_r0); } @@ -3213,10 +3918,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) code = emit_load_volatile_arguments (cfg, code); if (ppc_is_imm16 (cfg->stack_usage)) { - ppc_addic (code, ppc_sp, cfg->frame_reg, cfg->stack_usage); + ppc_addi (code, ppc_r11, cfg->frame_reg, cfg->stack_usage); } else { - ppc_load (code, ppc_r11, cfg->stack_usage); - ppc_add (code, ppc_sp, cfg->frame_reg, ppc_r11); + /* cfg->stack_usage is an int, so we can use + * an addis/addi sequence here even in 64-bit. */ + ppc_addis (code, ppc_r11, cfg->frame_reg, ppc_ha(cfg->stack_usage)); + ppc_addi (code, ppc_r11, ppc_r11, cfg->stack_usage); } if (!cfg->method->save_lmf) { /*for (i = 31; i >= 14; --i) { @@ -3225,32 +3932,47 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ppc_lfd (code, i, -pos, cfg->frame_reg); } }*/ - /* FIXME: restore registers before changing ppc_sp */ + pos = 0; for (i = 31; i >= 13; --i) { if (cfg->used_int_regs & (1 << i)) { - pos += sizeof (gulong); - ppc_load_reg_indexed (code, i, -pos, ppc_sp); + pos += sizeof (gpointer); + ppc_ldptr (code, i, -pos, ppc_r11); } } } else { /* FIXME restore from MonoLMF: though this can't happen yet */ } + ppc_mr (code, ppc_sp, ppc_r11); mono_add_patch_info (cfg, (guint8*) code - cfg->native_code, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0); - ppc_b (code, 0); + if (cfg->compile_aot) { + /* arch_emit_got_access () patches this */ + ppc_load32 (code, ppc_r0, 0); +#ifdef PPC_USES_FUNCTION_DESCRIPTOR + ppc_ldptr_indexed (code, ppc_r11, ppc_r30, ppc_r0); + ppc_ldptr (code, ppc_r0, 0, ppc_r11); +#else + ppc_ldptr_indexed (code, ppc_r0, ppc_r30, ppc_r0); +#endif + ppc_mtctr (code, ppc_r0); + ppc_bcctr (code, PPC_BR_ALWAYS, 0); + } else { + ppc_b (code, 0); + } break; } case OP_CHECK_THIS: /* ensure ins->sreg1 is not NULL */ - ppc_load_reg (code, ppc_r0, 0, ins->sreg1); + ppc_ldptr (code, ppc_r0, 0, ins->sreg1); break; case OP_ARGLIST: { - if (ppc_is_imm16 (cfg->sig_cookie + cfg->stack_usage)) { - ppc_addi (code, ppc_r0, cfg->frame_reg, cfg->sig_cookie + cfg->stack_usage); + long cookie_offset = cfg->sig_cookie + cfg->stack_usage; + if (ppc_is_imm16 (cookie_offset)) { + ppc_addi (code, ppc_r0, cfg->frame_reg, cookie_offset); } else { - ppc_load (code, ppc_r0, cfg->sig_cookie + cfg->stack_usage); + ppc_load (code, ppc_r0, cookie_offset); ppc_add (code, ppc_r0, cfg->frame_reg, ppc_r0); } - ppc_store_reg (code, ppc_r0, 0, ins->sreg1); + ppc_stptr (code, ppc_r0, 0, ins->sreg1); break; } case OP_FCALL: @@ -3264,7 +3986,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD, call->method); else mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_ABS, call->fptr); - if (FORCE_INDIR_CALL || cfg->method->dynamic) { + if ((FORCE_INDIR_CALL || cfg->method->dynamic) && !cfg->compile_aot) { ppc_load_func (code, ppc_r0, 0); ppc_mtlr (code, ppc_r0); ppc_blrl (code); @@ -3280,7 +4002,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_VCALL2_REG: case OP_VOIDCALL_REG: case OP_CALL_REG: +#ifdef PPC_USES_FUNCTION_DESCRIPTOR + ppc_ldptr (code, ppc_r0, 0, ins->sreg1); + /* FIXME: if we know that this is a method, we + can omit this load */ + ppc_ldptr (code, ppc_r2, 8, ins->sreg1); + ppc_mtlr (code, ppc_r0); +#else ppc_mtlr (code, ins->sreg1); +#endif ppc_blrl (code); /* FIXME: this should be handled somewhere else in the new jit */ code = emit_move_return_value (cfg, ins, code); @@ -3291,7 +4021,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_VCALL2_MEMBASE: case OP_VOIDCALL_MEMBASE: case OP_CALL_MEMBASE: - ppc_load_reg (code, ppc_r0, ins->inst_offset, ins->sreg1); + if (cfg->compile_aot && ins->sreg1 == ppc_r11) { + /* The trampolines clobber this */ + ppc_mr (code, ppc_r29, ins->sreg1); + ppc_ldptr (code, ppc_r0, ins->inst_offset, ppc_r29); + } else { + ppc_ldptr (code, ppc_r0, ins->inst_offset, ins->sreg1); + } ppc_mtlr (code, ppc_r0); ppc_blrl (code); /* FIXME: this should be handled somewhere else in the new jit */ @@ -3304,7 +4040,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) int area_offset = alloca_waste; area_offset &= ~31; ppc_addi (code, ppc_r11, ins->sreg1, alloca_waste + 31); - ppc_rlwinm (code, ppc_r11, ppc_r11, 0, 0, 27); + /* FIXME: should be calculated from MONO_ARCH_FRAME_ALIGNMENT */ + ppc_clear_right_imm (code, ppc_r11, ppc_r11, 4); /* use ctr to store the number of words to 0 if needed */ if (ins->flags & MONO_INST_INIT) { /* we zero 4 bytes at a time: @@ -3313,13 +4050,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) * it negative and iterate billions of times. */ ppc_addi (code, ppc_r0, ins->sreg1, 7); - ppc_srawi (code, ppc_r0, ppc_r0, 2); + ppc_shift_right_arith_imm (code, ppc_r0, ppc_r0, 2); ppc_mtctr (code, ppc_r0); } - ppc_load_reg (code, ppc_r0, 0, ppc_sp); + ppc_ldptr (code, ppc_r0, 0, ppc_sp); ppc_neg (code, ppc_r11, ppc_r11); - ppc_store_reg_update_indexed (code, ppc_r0, ppc_sp, ppc_r11); + ppc_stptr_update_indexed (code, ppc_r0, ppc_sp, ppc_r11); + /* FIXME: make this loop work in 8 byte + increments on PPC64 */ if (ins->flags & MONO_INST_INIT) { /* adjust the dest reg by -4 so we can use stwu */ /* we actually adjust -8 because we let the loop @@ -3341,7 +4080,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ppc_mr (code, ppc_r3, ins->sreg1); mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_arch_throw_exception"); - if (FORCE_INDIR_CALL || cfg->method->dynamic) { + if ((FORCE_INDIR_CALL || cfg->method->dynamic) && !cfg->compile_aot) { ppc_load_func (code, ppc_r0, 0); ppc_mtlr (code, ppc_r0); ppc_blrl (code); @@ -3355,7 +4094,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ppc_mr (code, ppc_r3, ins->sreg1); mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_arch_rethrow_exception"); - if (FORCE_INDIR_CALL || cfg->method->dynamic) { + if ((FORCE_INDIR_CALL || cfg->method->dynamic) && !cfg->compile_aot) { ppc_load_func (code, ppc_r0, 0); ppc_mtlr (code, ppc_r0); ppc_blrl (code); @@ -3370,10 +4109,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) code = emit_reserve_param_area (cfg, code); ppc_mflr (code, ppc_r0); if (ppc_is_imm16 (spvar->inst_offset)) { - ppc_store_reg (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg); + ppc_stptr (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg); } else { ppc_load (code, ppc_r11, spvar->inst_offset); - ppc_store_reg_indexed (code, ppc_r0, ppc_r11, spvar->inst_basereg); + ppc_stptr_indexed (code, ppc_r0, ppc_r11, spvar->inst_basereg); } break; } @@ -3384,10 +4123,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) if (ins->sreg1 != ppc_r3) ppc_mr (code, ppc_r3, ins->sreg1); if (ppc_is_imm16 (spvar->inst_offset)) { - ppc_load_reg (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg); + ppc_ldptr (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg); } else { ppc_load (code, ppc_r11, spvar->inst_offset); - ppc_load_reg_indexed (code, ppc_r0, spvar->inst_basereg, ppc_r11); + ppc_ldptr_indexed (code, ppc_r0, spvar->inst_basereg, ppc_r11); } ppc_mtlr (code, ppc_r0); ppc_blr (code); @@ -3397,7 +4136,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region); g_assert (spvar->inst_basereg != ppc_sp); code = emit_unreserve_param_area (cfg, code); - ppc_load_reg (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg); + ppc_ldptr (code, ppc_r0, spvar->inst_offset, spvar->inst_basereg); ppc_mtlr (code, ppc_r0); ppc_blr (code); break; @@ -3410,22 +4149,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) ins->inst_c0 = code - cfg->native_code; break; case OP_BR: - if (ins->flags & MONO_INST_BRLABEL) { - /*if (ins->inst_i0->inst_c0) { - ppc_b (code, 0); - //x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0); - } else*/ { - mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0); - ppc_b (code, 0); - } - } else { - /*if (ins->inst_target_bb->native_offset) { - ppc_b (code, 0); - //x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); - } else*/ { - mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb); - ppc_b (code, 0); - } + /*if (ins->inst_target_bb->native_offset) { + ppc_b (code, 0); + //x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset); + } else*/ { + mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb); + ppc_b (code, 0); } break; case OP_BR_REG: @@ -3434,6 +4163,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_CEQ: case OP_ICEQ: + CASE_PPC64 (OP_LCEQ) ppc_li (code, ins->dreg, 0); ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 2); ppc_li (code, ins->dreg, 1); @@ -3442,6 +4172,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_CLT_UN: case OP_ICLT: case OP_ICLT_UN: + CASE_PPC64 (OP_LCLT) + CASE_PPC64 (OP_LCLT_UN) ppc_li (code, ins->dreg, 1); ppc_bc (code, PPC_BR_TRUE, PPC_BR_LT, 2); ppc_li (code, ins->dreg, 0); @@ -3450,6 +4182,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_CGT_UN: case OP_ICGT: case OP_ICGT_UN: + CASE_PPC64 (OP_LCGT) + CASE_PPC64 (OP_LCGT_UN) ppc_li (code, ins->dreg, 1); ppc_bc (code, PPC_BR_TRUE, PPC_BR_GT, 2); ppc_li (code, ins->dreg, 0); @@ -3478,21 +4212,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_COND_EXC_ILE_UN: EMIT_COND_SYSTEM_EXCEPTION (ins->opcode - OP_COND_EXC_IEQ, ins->inst_p1); break; - case OP_COND_EXC_C: - /* check XER [0-3] (SO, OV, CA): we can't use mcrxr - */ - /*ppc_mfspr (code, ppc_r0, ppc_xer); - ppc_andisd (code, ppc_r0, ppc_r0, (1<<14)); - EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, "OverflowException"); - break;*/ - case OP_COND_EXC_OV: - /*ppc_mcrxr (code, 0); - EMIT_COND_SYSTEM_EXCEPTION (CEE_BGT - CEE_BEQ, ins->inst_p1); - break;*/ - case OP_COND_EXC_NC: - case OP_COND_EXC_NO: - g_assert_not_reached (); - break; case OP_IBEQ: case OP_IBNE_UN: case OP_IBLT: @@ -3508,22 +4227,43 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) /* floating point opcodes */ case OP_R8CONST: + g_assert (cfg->compile_aot); + + /* FIXME: Optimize this */ + ppc_bl (code, 1); + ppc_mflr (code, ppc_r11); + ppc_b (code, 3); + *(double*)code = *(double*)ins->inst_p0; + code += 8; + ppc_lfd (code, ins->dreg, 8, ppc_r11); + break; case OP_R4CONST: g_assert_not_reached (); + break; case OP_STORER8_MEMBASE_REG: if (ppc_is_imm16 (ins->inst_offset)) { ppc_stfd (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg); } else { - ppc_load (code, ppc_r0, ins->inst_offset); - ppc_stfdx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0); + if (ppc_is_imm32 (ins->inst_offset)) { + ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset)); + ppc_stfd (code, ins->sreg1, ins->inst_offset, ppc_r12); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_stfdx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0); + } } break; case OP_LOADR8_MEMBASE: if (ppc_is_imm16 (ins->inst_offset)) { ppc_lfd (code, ins->dreg, ins->inst_offset, ins->inst_basereg); } else { - ppc_load (code, ppc_r0, ins->inst_offset); - ppc_lfdx (code, ins->dreg, ins->inst_destbasereg, ppc_r0); + if (ppc_is_imm32 (ins->inst_offset)) { + ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset)); + ppc_lfd (code, ins->dreg, ins->inst_offset, ppc_r12); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_lfdx (code, ins->dreg, ins->inst_destbasereg, ppc_r0); + } } break; case OP_STORER4_MEMBASE_REG: @@ -3531,30 +4271,40 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) if (ppc_is_imm16 (ins->inst_offset)) { ppc_stfs (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg); } else { - ppc_load (code, ppc_r0, ins->inst_offset); - ppc_stfsx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0); + if (ppc_is_imm32 (ins->inst_offset)) { + ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset)); + ppc_stfs (code, ins->sreg1, ins->inst_offset, ppc_r12); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_stfsx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0); + } } break; case OP_LOADR4_MEMBASE: if (ppc_is_imm16 (ins->inst_offset)) { ppc_lfs (code, ins->dreg, ins->inst_offset, ins->inst_basereg); } else { - ppc_load (code, ppc_r0, ins->inst_offset); - ppc_lfsx (code, ins->dreg, ins->inst_destbasereg, ppc_r0); + if (ppc_is_imm32 (ins->inst_offset)) { + ppc_addis (code, ppc_r12, ins->inst_destbasereg, ppc_ha(ins->inst_offset)); + ppc_lfs (code, ins->dreg, ins->inst_offset, ppc_r12); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_lfsx (code, ins->dreg, ins->inst_destbasereg, ppc_r0); + } } break; case OP_LOADR4_MEMINDEX: - ppc_lfsx (code, ins->dreg, ins->sreg2, ins->inst_basereg); + ppc_lfsx (code, ins->dreg, ins->inst_basereg, ins->sreg2); break; case OP_LOADR8_MEMINDEX: - ppc_lfdx (code, ins->dreg, ins->sreg2, ins->inst_basereg); + ppc_lfdx (code, ins->dreg, ins->inst_basereg, ins->sreg2); break; case OP_STORER4_MEMINDEX: ppc_frsp (code, ins->sreg1, ins->sreg1); - ppc_stfsx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg); + ppc_stfsx (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2); break; case OP_STORER8_MEMINDEX: - ppc_stfdx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg); + ppc_stfdx (code, ins->sreg1, ins->inst_destbasereg, ins->sreg2); break; case CEE_CONV_R_UN: case CEE_CONV_R4: /* FIXME: change precision */ @@ -3580,17 +4330,15 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_FCONV_TO_U: code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, FALSE); break; - case OP_FCONV_TO_I8: - case OP_FCONV_TO_U8: - g_assert_not_reached (); - /* Implemented as helper calls */ - break; case OP_LCONV_TO_R_UN: g_assert_not_reached (); /* Implemented as helper calls */ break; case OP_LCONV_TO_OVF_I4_2: case OP_LCONV_TO_OVF_I: { +#ifdef __mono_ppc64__ + NOT_IMPLEMENTED; +#else guint8 *negative_branch, *msword_positive_branch, *msword_negative_branch, *ovf_ex_target; // Check if its negative ppc_cmpi (code, 0, 0, ins->sreg1, 0); @@ -3614,6 +4362,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) if (ins->dreg != ins->sreg1) ppc_mr (code, ins->dreg, ins->sreg1); break; +#endif } case OP_SQRT: ppc_fsqrtd (code, ins->dreg, ins->sreg1); @@ -3718,54 +4467,240 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_JUMP_TABLE: mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0); - ppc_load (code, ins->dreg, 0x0f0f0f0f); +#ifdef __mono_ppc64__ + ppc_load_sequence (code, ins->dreg, (guint64)0x0f0f0f0f0f0f0f0fLL); +#else + ppc_load_sequence (code, ins->dreg, (gulong)0x0f0f0f0fL); +#endif break; } - default: - g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__); - g_assert_not_reached (); - } - if ((cfg->opt & MONO_OPT_BRANCH) && ((code - cfg->native_code - offset) > max_len)) { - g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)", - mono_inst_name (ins->opcode), max_len, (glong)(code - cfg->native_code - offset)); - g_assert_not_reached (); +#ifdef __mono_ppc64__ + case OP_ICONV_TO_I4: + case OP_SEXT_I4: + ppc_extsw (code, ins->dreg, ins->sreg1); + break; + case OP_ICONV_TO_U4: + case OP_ZEXT_I4: + ppc_clrldi (code, ins->dreg, ins->sreg1, 32); + break; + case OP_ICONV_TO_R4: + case OP_ICONV_TO_R8: + case OP_LCONV_TO_R4: + case OP_LCONV_TO_R8: { + int tmp; + if (ins->opcode == OP_ICONV_TO_R4 || ins->opcode == OP_ICONV_TO_R8) { + ppc_extsw (code, ppc_r0, ins->sreg1); + tmp = ppc_r0; + } else { + tmp = ins->sreg1; + } + if (cpu_hw_caps & PPC_MOVE_FPR_GPR) { + ppc_mffgpr (code, ins->dreg, tmp); + } else { + ppc_str (code, tmp, -8, ppc_r1); + ppc_lfd (code, ins->dreg, -8, ppc_r1); + } + ppc_fcfid (code, ins->dreg, ins->dreg); + if (ins->opcode == OP_ICONV_TO_R4 || ins->opcode == OP_LCONV_TO_R4) + ppc_frsp (code, ins->dreg, ins->dreg); + break; } - - cpos += max_len; - - last_ins = ins; - last_offset = offset; - } - - cfg->code_len = code - cfg->native_code; -} - -void -mono_arch_register_lowlevel_calls (void) -{ + case OP_LSHR: + ppc_srad (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_LSHR_UN: + ppc_srd (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_COND_EXC_C: + /* check XER [0-3] (SO, OV, CA): we can't use mcrxr + */ + ppc_mfspr (code, ppc_r0, ppc_xer); + ppc_andisd (code, ppc_r0, ppc_r0, (1 << 13)); /* CA */ + EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, ins->inst_p1); + break; + case OP_COND_EXC_OV: + ppc_mfspr (code, ppc_r0, ppc_xer); + ppc_andisd (code, ppc_r0, ppc_r0, (1 << 14)); /* OV */ + EMIT_COND_SYSTEM_EXCEPTION_FLAGS (PPC_BR_FALSE, PPC_BR_EQ, ins->inst_p1); + break; + case OP_LBEQ: + case OP_LBNE_UN: + case OP_LBLT: + case OP_LBLT_UN: + case OP_LBGT: + case OP_LBGT_UN: + case OP_LBGE: + case OP_LBGE_UN: + case OP_LBLE: + case OP_LBLE_UN: + EMIT_COND_BRANCH (ins, ins->opcode - OP_LBEQ); + break; + case OP_FCONV_TO_I8: + code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 8, TRUE); + break; + case OP_FCONV_TO_U8: + code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 8, FALSE); + break; + case OP_STOREI4_MEMBASE_REG: + if (ppc_is_imm16 (ins->inst_offset)) { + ppc_stw (code, ins->sreg1, ins->inst_offset, ins->inst_destbasereg); + } else { + ppc_load (code, ppc_r0, ins->inst_offset); + ppc_stwx (code, ins->sreg1, ins->inst_destbasereg, ppc_r0); + } + break; + case OP_STOREI4_MEMINDEX: + ppc_stwx (code, ins->sreg1, ins->sreg2, ins->inst_destbasereg); + break; + case OP_ISHR_IMM: + ppc_srawi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f)); + break; + case OP_ISHR_UN_IMM: + if (ins->inst_imm & 0x1f) + ppc_srwi (code, ins->dreg, ins->sreg1, (ins->inst_imm & 0x1f)); + else + ppc_mr (code, ins->dreg, ins->sreg1); + break; + case OP_ATOMIC_ADD_NEW_I4: + case OP_ATOMIC_ADD_NEW_I8: { + guint8 *loop = code, *branch; + g_assert (ins->inst_offset == 0); + if (ins->opcode == OP_ATOMIC_ADD_NEW_I4) + ppc_lwarx (code, ppc_r0, 0, ins->inst_basereg); + else + ppc_ldarx (code, ppc_r0, 0, ins->inst_basereg); + ppc_add (code, ppc_r0, ppc_r0, ins->sreg2); + if (ins->opcode == OP_ATOMIC_ADD_NEW_I4) + ppc_stwcxd (code, ppc_r0, 0, ins->inst_basereg); + else + ppc_stdcxd (code, ppc_r0, 0, ins->inst_basereg); + branch = code; + ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0); + ppc_patch (branch, loop); + ppc_mr (code, ins->dreg, ppc_r0); + break; + } +#else + case OP_ICONV_TO_R4: + case OP_ICONV_TO_R8: { + if (cpu_hw_caps & PPC_ISA_64) { + ppc_srawi(code, ppc_r0, ins->sreg1, 31); + ppc_stw (code, ppc_r0, -8, ppc_r1); + ppc_stw (code, ins->sreg1, -4, ppc_r1); + ppc_lfd (code, ins->dreg, -8, ppc_r1); + ppc_fcfid (code, ins->dreg, ins->dreg); + if (ins->opcode == OP_ICONV_TO_R4) + ppc_frsp (code, ins->dreg, ins->dreg); + } + break; + } +#endif + case OP_ATOMIC_CAS_I4: + CASE_PPC64 (OP_ATOMIC_CAS_I8) { + int location = ins->sreg1; + int value = ins->sreg2; + int comparand = ins->sreg3; + guint8 *start, *not_equal, *lost_reservation; + + start = code; + if (ins->opcode == OP_ATOMIC_CAS_I4) + ppc_lwarx (code, ppc_r0, 0, location); +#ifdef __mono_ppc64__ + else + ppc_ldarx (code, ppc_r0, 0, location); +#endif + ppc_cmp (code, 0, ins->opcode == OP_ATOMIC_CAS_I4 ? 0 : 1, ppc_r0, comparand); + + not_equal = code; + ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0); + if (ins->opcode == OP_ATOMIC_CAS_I4) + ppc_stwcxd (code, value, 0, location); +#ifdef __mono_ppc64__ + else + ppc_stdcxd (code, value, 0, location); +#endif + + lost_reservation = code; + ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0); + ppc_patch (lost_reservation, start); + + ppc_patch (not_equal, code); + ppc_mr (code, ins->dreg, ppc_r0); + break; + } + + default: + g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__); + g_assert_not_reached (); + } + + if ((cfg->opt & MONO_OPT_BRANCH) && ((code - cfg->native_code - offset) > max_len)) { + g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)", + mono_inst_name (ins->opcode), max_len, (glong)(code - cfg->native_code - offset)); + g_assert_not_reached (); + } + + cpos += max_len; + + last_ins = ins; + last_offset = offset; + } + + cfg->code_len = code - cfg->native_code; +} + +void +mono_arch_register_lowlevel_calls (void) +{ + /* The signature doesn't matter */ + mono_register_jit_icall (mono_ppc_throw_exception, "mono_ppc_throw_exception", mono_create_icall_signature ("void"), TRUE); } -#define patch_lis_ori(ip,val) do {\ +#ifdef __mono_ppc64__ +#define patch_load_sequence(ip,val) do {\ + guint16 *__load = (guint16*)(ip); \ + g_assert (sizeof (val) == sizeof (gsize)); \ + __load [1] = (((guint64)(gsize)(val)) >> 48) & 0xffff; \ + __load [3] = (((guint64)(gsize)(val)) >> 32) & 0xffff; \ + __load [7] = (((guint64)(gsize)(val)) >> 16) & 0xffff; \ + __load [9] = ((guint64)(gsize)(val)) & 0xffff; \ + } while (0) +#else +#define patch_load_sequence(ip,val) do {\ guint16 *__lis_ori = (guint16*)(ip); \ __lis_ori [1] = (((gulong)(val)) >> 16) & 0xffff; \ __lis_ori [3] = ((gulong)(val)) & 0xffff; \ } while (0) +#endif void mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors) { MonoJumpInfo *patch_info; + gboolean compile_aot = !run_cctors; for (patch_info = ji; patch_info; patch_info = patch_info->next) { unsigned char *ip = patch_info->ip.i + code; unsigned char *target; + gboolean is_fd = FALSE; target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors); + if (compile_aot) { + switch (patch_info->type) { + case MONO_PATCH_INFO_BB: + case MONO_PATCH_INFO_LABEL: + break; + default: + /* No need to patch these */ + continue; + } + } + switch (patch_info->type) { case MONO_PATCH_INFO_IP: - patch_lis_ori (ip, ip); + patch_load_sequence (ip, ip); continue; case MONO_PATCH_INFO_METHOD_REL: g_assert_not_reached (); @@ -3775,7 +4710,7 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono gpointer *table = (gpointer *)patch_info->data.table->table; int i; - patch_lis_ori (ip, table); + patch_load_sequence (ip, table); for (i = 0; i < patch_info->data.table->table_size; i++) { table [i] = (glong)patch_info->data.table->table [i] + code; @@ -3794,7 +4729,7 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono case MONO_PATCH_INFO_TYPE_FROM_HANDLE: case MONO_PATCH_INFO_LDTOKEN: /* from OP_AOTCONST : lis + ori */ - patch_lis_ori (ip, target); + patch_load_sequence (ip, target); continue; case MONO_PATCH_INFO_R4: case MONO_PATCH_INFO_R8: @@ -3810,10 +4745,18 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono case MONO_PATCH_INFO_EXC_OVF: /* everything is dealt with at epilog output time */ continue; +#ifdef PPC_USES_FUNCTION_DESCRIPTOR + case MONO_PATCH_INFO_INTERNAL_METHOD: + case MONO_PATCH_INFO_ABS: + case MONO_PATCH_INFO_CLASS_INIT: + case MONO_PATCH_INFO_RGCTX_FETCH: + is_fd = TRUE; + break; +#endif default: break; } - ppc_patch (ip, target); + ppc_patch_full (ip, target, is_fd); } } @@ -3823,22 +4766,24 @@ mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, Mono * the instruction offset immediate for all the registers. */ static guint8* -save_registers (guint8* code, int pos, int base_reg, gboolean save_lmf, guint32 used_int_regs) +save_registers (MonoCompile *cfg, guint8* code, int pos, int base_reg, gboolean save_lmf, guint32 used_int_regs, int cfa_offset) { int i; if (!save_lmf) { for (i = 13; i <= 31; i++) { if (used_int_regs & (1 << i)) { - ppc_stw (code, i, pos, base_reg); - pos += sizeof (gulong); + ppc_str (code, i, pos, base_reg); + mono_emit_unwind_op_offset (cfg, code, i, pos - cfa_offset); + pos += sizeof (mgreg_t); } } } else { /* pos is the start of the MonoLMF structure */ int offset = pos + G_STRUCT_OFFSET (MonoLMF, iregs); for (i = 13; i <= 31; i++) { - ppc_stw (code, i, offset, base_reg); - offset += sizeof (gulong); + ppc_str (code, i, offset, base_reg); + mono_emit_unwind_op_offset (cfg, code, i, offset - cfa_offset); + offset += sizeof (mgreg_t); } offset = pos + G_STRUCT_OFFSET (MonoLMF, fregs); for (i = 14; i < 32; i++) { @@ -3874,7 +4819,8 @@ mono_arch_emit_prolog (MonoCompile *cfg) MonoBasicBlock *bb; MonoMethodSignature *sig; MonoInst *inst; - int alloc_size, pos, max_offset, i; + long alloc_size, pos, max_offset, cfa_offset; + int i; guint8 *code; CallInfo *cinfo; int tracing = 0; @@ -3885,12 +4831,18 @@ mono_arch_emit_prolog (MonoCompile *cfg) tracing = 1; sig = mono_method_signature (method); - cfg->code_size = 256 + sig->param_count * 20; + cfg->code_size = MONO_PPC_32_64_CASE (260, 384) + sig->param_count * 20; code = cfg->native_code = g_malloc (cfg->code_size); + cfa_offset = 0; + + /* We currently emit unwind info for aot, but don't use it */ + mono_emit_unwind_op_def_cfa (cfg, code, ppc_r1, 0); + if (1 || cfg->flags & MONO_CFG_HAS_CALLS) { ppc_mflr (code, ppc_r0); - ppc_store_reg (code, ppc_r0, PPC_RET_ADDR_OFFSET, ppc_sp); + ppc_str (code, ppc_r0, PPC_RET_ADDR_OFFSET, ppc_sp); + mono_emit_unwind_op_offset (cfg, code, ppc_lr, PPC_RET_ADDR_OFFSET); } alloc_size = cfg->stack_offset; @@ -3899,7 +4851,7 @@ mono_arch_emit_prolog (MonoCompile *cfg) if (!method->save_lmf) { for (i = 31; i >= 13; --i) { if (cfg->used_int_regs & (1 << i)) { - pos += sizeof (gulong); + pos += sizeof (mgreg_t); } } } else { @@ -3917,25 +4869,31 @@ mono_arch_emit_prolog (MonoCompile *cfg) g_assert ((alloc_size & (MONO_ARCH_FRAME_ALIGNMENT-1)) == 0); if (alloc_size) { if (ppc_is_imm16 (-alloc_size)) { - ppc_store_reg_update (code, ppc_sp, -alloc_size, ppc_sp); - code = save_registers (code, alloc_size - pos, ppc_sp, method->save_lmf, cfg->used_int_regs); + ppc_str_update (code, ppc_sp, -alloc_size, ppc_sp); + cfa_offset = alloc_size; + mono_emit_unwind_op_def_cfa_offset (cfg, code, alloc_size); + code = save_registers (cfg, code, alloc_size - pos, ppc_sp, method->save_lmf, cfg->used_int_regs, cfa_offset); } else { if (pos) ppc_addi (code, ppc_r11, ppc_sp, -pos); ppc_load (code, ppc_r0, -alloc_size); - ppc_stwux (code, ppc_sp, ppc_sp, ppc_r0); - code = save_registers (code, 0, ppc_r11, method->save_lmf, cfg->used_int_regs); + ppc_str_update_indexed (code, ppc_sp, ppc_sp, ppc_r0); + cfa_offset = alloc_size; + mono_emit_unwind_op_def_cfa_offset (cfg, code, alloc_size); + code = save_registers (cfg, code, 0, ppc_r11, method->save_lmf, cfg->used_int_regs, cfa_offset); } } - if (cfg->frame_reg != ppc_sp) + if (cfg->frame_reg != ppc_sp) { ppc_mr (code, cfg->frame_reg, ppc_sp); + mono_emit_unwind_op_def_cfa_reg (cfg, code, cfg->frame_reg); + } /* store runtime generic context */ if (cfg->rgctx_var) { g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET && (cfg->rgctx_var->inst_basereg == ppc_r1 || cfg->rgctx_var->inst_basereg == ppc_r31)); - ppc_store_reg (code, MONO_ARCH_RGCTX_REG, cfg->rgctx_var->inst_offset, cfg->rgctx_var->inst_basereg); + ppc_stptr (code, MONO_ARCH_RGCTX_REG, cfg->rgctx_var->inst_offset, cfg->rgctx_var->inst_basereg); } /* compute max_offset in order to use short forward jumps @@ -3966,10 +4924,10 @@ mono_arch_emit_prolog (MonoCompile *cfg) g_assert (inst); if (ppc_is_imm16 (inst->inst_offset)) { - ppc_store_reg (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); + ppc_stptr (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); } else { ppc_load (code, ppc_r11, inst->inst_offset); - ppc_store_reg_indexed (code, ainfo->reg, ppc_r11, inst->inst_basereg); + ppc_stptr_indexed (code, ainfo->reg, ppc_r11, inst->inst_basereg); } } @@ -3986,13 +4944,13 @@ mono_arch_emit_prolog (MonoCompile *cfg) else if (ainfo->regtype == RegTypeFP) ppc_fmr (code, inst->dreg, ainfo->reg); else if (ainfo->regtype == RegTypeBase) { - ppc_load_reg (code, ppc_r11, 0, ppc_sp); - ppc_load_reg (code, inst->dreg, ainfo->offset, ppc_r11); + ppc_ldr (code, ppc_r11, 0, ppc_sp); + ppc_ldptr (code, inst->dreg, ainfo->offset, ppc_r11); } else g_assert_not_reached (); if (cfg->verbose_level > 2) - g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg)); + g_print ("Argument %ld assigned to register %s\n", pos, mono_arch_regname (inst->dreg)); } else { /* the argument should be put on the stack: FIXME handle size != word */ if (ainfo->regtype == RegTypeGeneral) { @@ -4001,75 +4959,159 @@ mono_arch_emit_prolog (MonoCompile *cfg) if (ppc_is_imm16 (inst->inst_offset)) { ppc_stb (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); } else { - ppc_load (code, ppc_r11, inst->inst_offset); - ppc_stbx (code, ainfo->reg, ppc_r11, inst->inst_basereg); + if (ppc_is_imm32 (inst->inst_offset)) { + ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset)); + ppc_stb (code, ainfo->reg, ppc_r11, inst->inst_offset); + } else { + ppc_load (code, ppc_r11, inst->inst_offset); + ppc_stbx (code, ainfo->reg, inst->inst_basereg, ppc_r11); + } } break; case 2: if (ppc_is_imm16 (inst->inst_offset)) { ppc_sth (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); + } else { + if (ppc_is_imm32 (inst->inst_offset)) { + ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset)); + ppc_sth (code, ainfo->reg, ppc_r11, inst->inst_offset); + } else { + ppc_load (code, ppc_r11, inst->inst_offset); + ppc_sthx (code, ainfo->reg, inst->inst_basereg, ppc_r11); + } + } + break; +#ifdef __mono_ppc64__ + case 4: + if (ppc_is_imm16 (inst->inst_offset)) { + ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); + } else { + if (ppc_is_imm32 (inst->inst_offset)) { + ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset)); + ppc_stw (code, ainfo->reg, ppc_r11, inst->inst_offset); + } else { + ppc_load (code, ppc_r11, inst->inst_offset); + ppc_stwx (code, ainfo->reg, inst->inst_basereg, ppc_r11); + } + } + break; + case 8: + if (ppc_is_imm16 (inst->inst_offset)) { + ppc_str (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); } else { ppc_load (code, ppc_r11, inst->inst_offset); - ppc_sthx (code, ainfo->reg, ppc_r11, inst->inst_basereg); + ppc_str_indexed (code, ainfo->reg, ppc_r11, inst->inst_basereg); } break; +#else case 8: if (ppc_is_imm16 (inst->inst_offset + 4)) { ppc_stw (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); ppc_stw (code, ainfo->reg + 1, inst->inst_offset + 4, inst->inst_basereg); } else { - ppc_load (code, ppc_r11, inst->inst_offset); - ppc_add (code, ppc_r11, ppc_r11, inst->inst_basereg); + ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset)); + ppc_addi (code, ppc_r11, ppc_r11, inst->inst_offset); ppc_stw (code, ainfo->reg, 0, ppc_r11); ppc_stw (code, ainfo->reg + 1, 4, ppc_r11); } break; +#endif default: if (ppc_is_imm16 (inst->inst_offset)) { - ppc_store_reg (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); + ppc_stptr (code, ainfo->reg, inst->inst_offset, inst->inst_basereg); } else { - ppc_load (code, ppc_r11, inst->inst_offset); - ppc_store_reg_indexed (code, ainfo->reg, ppc_r11, inst->inst_basereg); + if (ppc_is_imm32 (inst->inst_offset)) { + ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset)); + ppc_stptr (code, ainfo->reg, ppc_r11, inst->inst_offset); + } else { + ppc_load (code, ppc_r11, inst->inst_offset); + ppc_stptr_indexed (code, ainfo->reg, inst->inst_basereg, ppc_r11); + } } break; } } else if (ainfo->regtype == RegTypeBase) { + g_assert (ppc_is_imm16 (ainfo->offset)); /* load the previous stack pointer in r11 */ - ppc_load_reg (code, ppc_r11, 0, ppc_sp); - ppc_load_reg (code, ppc_r0, ainfo->offset, ppc_r11); + ppc_ldr (code, ppc_r11, 0, ppc_sp); + ppc_ldptr (code, ppc_r0, ainfo->offset, ppc_r11); switch (ainfo->size) { case 1: if (ppc_is_imm16 (inst->inst_offset)) { ppc_stb (code, ppc_r0, inst->inst_offset, inst->inst_basereg); } else { - ppc_load (code, ppc_r11, inst->inst_offset); - ppc_stbx (code, ppc_r0, ppc_r11, inst->inst_basereg); + if (ppc_is_imm32 (inst->inst_offset)) { + ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset)); + ppc_stb (code, ppc_r0, ppc_r11, inst->inst_offset); + } else { + ppc_load (code, ppc_r11, inst->inst_offset); + ppc_stbx (code, ppc_r0, inst->inst_basereg, ppc_r11); + } } break; case 2: if (ppc_is_imm16 (inst->inst_offset)) { ppc_sth (code, ppc_r0, inst->inst_offset, inst->inst_basereg); + } else { + if (ppc_is_imm32 (inst->inst_offset)) { + ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset)); + ppc_sth (code, ppc_r0, ppc_r11, inst->inst_offset); + } else { + ppc_load (code, ppc_r11, inst->inst_offset); + ppc_sthx (code, ppc_r0, inst->inst_basereg, ppc_r11); + } + } + break; +#ifdef __mono_ppc64__ + case 4: + if (ppc_is_imm16 (inst->inst_offset)) { + ppc_stw (code, ppc_r0, inst->inst_offset, inst->inst_basereg); + } else { + if (ppc_is_imm32 (inst->inst_offset)) { + ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset)); + ppc_stw (code, ppc_r0, ppc_r11, inst->inst_offset); + } else { + ppc_load (code, ppc_r11, inst->inst_offset); + ppc_stwx (code, ppc_r0, inst->inst_basereg, ppc_r11); + } + } + break; + case 8: + if (ppc_is_imm16 (inst->inst_offset)) { + ppc_str (code, ppc_r0, inst->inst_offset, inst->inst_basereg); } else { ppc_load (code, ppc_r11, inst->inst_offset); - ppc_sthx (code, ppc_r0, ppc_r11, inst->inst_basereg); + ppc_str_indexed (code, ppc_r0, ppc_r11, inst->inst_basereg); } break; +#else case 8: + g_assert (ppc_is_imm16 (ainfo->offset + 4)); if (ppc_is_imm16 (inst->inst_offset + 4)) { ppc_stw (code, ppc_r0, inst->inst_offset, inst->inst_basereg); ppc_lwz (code, ppc_r0, ainfo->offset + 4, ppc_r11); ppc_stw (code, ppc_r0, inst->inst_offset + 4, inst->inst_basereg); } else { - /* FIXME */ - g_assert_not_reached (); + /* use r12 to load the 2nd half of the long before we clobber r11. */ + ppc_lwz (code, ppc_r12, ainfo->offset + 4, ppc_r11); + ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset)); + ppc_addi (code, ppc_r11, ppc_r11, inst->inst_offset); + ppc_stw (code, ppc_r0, 0, ppc_r11); + ppc_stw (code, ppc_r12, 4, ppc_r11); } break; +#endif default: if (ppc_is_imm16 (inst->inst_offset)) { - ppc_store_reg (code, ppc_r0, inst->inst_offset, inst->inst_basereg); + ppc_stptr (code, ppc_r0, inst->inst_offset, inst->inst_basereg); } else { - ppc_load (code, ppc_r11, inst->inst_offset); - ppc_store_reg_indexed (code, ppc_r0, ppc_r11, inst->inst_basereg); + if (ppc_is_imm32 (inst->inst_offset)) { + ppc_addis (code, ppc_r11, inst->inst_basereg, ppc_ha(inst->inst_offset)); + ppc_stptr (code, ppc_r0, ppc_r11, inst->inst_offset); + } else { + ppc_load (code, ppc_r11, inst->inst_offset); + ppc_stptr_indexed (code, ppc_r0, inst->inst_basereg, ppc_r11); + } } break; } @@ -4087,11 +5129,11 @@ mono_arch_emit_prolog (MonoCompile *cfg) int cur_reg; int size = 0; g_assert (ppc_is_imm16 (inst->inst_offset)); - g_assert (ppc_is_imm16 (inst->inst_offset + ainfo->size * sizeof (gpointer))); + g_assert (ppc_is_imm16 (inst->inst_offset + ainfo->vtregs * sizeof (gpointer))); /* FIXME: what if there is no class? */ if (sig->pinvoke && mono_class_from_mono_type (inst->inst_vtype)) size = mono_class_native_size (mono_class_from_mono_type (inst->inst_vtype), NULL); - for (cur_reg = 0; cur_reg < ainfo->size; ++cur_reg) { + for (cur_reg = 0; cur_reg < ainfo->vtregs; ++cur_reg) { #if __APPLE__ /* * Darwin handles 1 and 2 byte @@ -4106,14 +5148,30 @@ mono_arch_emit_prolog (MonoCompile *cfg) ppc_stb (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg); else #endif - ppc_stw (code, ainfo->reg + cur_reg, doffset, inst->inst_basereg); + { +#ifdef __mono_ppc64__ + if (ainfo->bytes) { + g_assert (cur_reg == 0); + ppc_sldi (code, ppc_r0, ainfo->reg, + (sizeof (gpointer) - ainfo->bytes) * 8); + ppc_stptr (code, ppc_r0, doffset, inst->inst_basereg); + } else +#endif + { + ppc_stptr (code, ainfo->reg + cur_reg, doffset, + inst->inst_basereg); + } + } soffset += sizeof (gpointer); doffset += sizeof (gpointer); } if (ainfo->vtsize) { + /* FIXME: we need to do the shifting here, too */ + if (ainfo->bytes) + NOT_IMPLEMENTED; /* load the previous stack pointer in r11 (r0 gets overwritten by the memcpy) */ - ppc_load_reg (code, ppc_r11, 0, ppc_sp); - if ((size & 3) != 0) { + ppc_ldr (code, ppc_r11, 0, ppc_sp); + if ((size & MONO_PPC_32_64_CASE (3, 7)) != 0) { code = emit_memcpy (code, size - soffset, inst->inst_basereg, doffset, ppc_r11, ainfo->offset + soffset); @@ -4127,8 +5185,8 @@ mono_arch_emit_prolog (MonoCompile *cfg) /* if it was originally a RegTypeBase */ if (ainfo->offset) { /* load the previous stack pointer in r11 */ - ppc_load_reg (code, ppc_r11, 0, ppc_sp); - ppc_load_reg (code, ppc_r11, ainfo->offset, ppc_r11); + ppc_ldr (code, ppc_r11, 0, ppc_sp); + ppc_ldptr (code, ppc_r11, ainfo->offset, ppc_r11); } else { ppc_mr (code, ppc_r11, ainfo->reg); } @@ -4137,7 +5195,7 @@ mono_arch_emit_prolog (MonoCompile *cfg) MonoInst *addr = cfg->tailcall_valuetype_addrs [tailcall_struct_index]; g_assert (ppc_is_imm16 (addr->inst_offset)); - ppc_store_reg (code, ppc_r11, addr->inst_offset, addr->inst_basereg); + ppc_stptr (code, ppc_r11, addr->inst_offset, addr->inst_basereg); tailcall_struct_index++; } @@ -4152,9 +5210,13 @@ mono_arch_emit_prolog (MonoCompile *cfg) } if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) { - ppc_load (code, ppc_r3, cfg->domain); + if (cfg->compile_aot) + /* AOT code is only used in the root domain */ + ppc_load_ptr (code, ppc_r3, 0); + else + ppc_load_ptr (code, ppc_r3, cfg->domain); mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach"); - if (FORCE_INDIR_CALL || cfg->method->dynamic) { + if ((FORCE_INDIR_CALL || cfg->method->dynamic) && !cfg->compile_aot) { ppc_load_func (code, ppc_r0, 0); ppc_mtlr (code, ppc_r0); ppc_blrl (code); @@ -4166,12 +5228,16 @@ mono_arch_emit_prolog (MonoCompile *cfg) if (method->save_lmf) { if (lmf_pthread_key != -1) { emit_tls_access (code, ppc_r3, lmf_pthread_key); - if (G_STRUCT_OFFSET (MonoJitTlsData, lmf)) + if (tls_mode != TLS_MODE_NPTL && G_STRUCT_OFFSET (MonoJitTlsData, lmf)) ppc_addi (code, ppc_r3, ppc_r3, G_STRUCT_OFFSET (MonoJitTlsData, lmf)); } else { + if (cfg->compile_aot) { + /* Compute the got address which is needed by the PLT entry */ + code = mono_arch_emit_load_got_addr (cfg->native_code, code, cfg, NULL); + } mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr"); - if (FORCE_INDIR_CALL || cfg->method->dynamic) { + if ((FORCE_INDIR_CALL || cfg->method->dynamic) && !cfg->compile_aot) { ppc_load_func (code, ppc_r0, 0); ppc_mtlr (code, ppc_r0); ppc_blrl (code); @@ -4188,27 +5254,40 @@ mono_arch_emit_prolog (MonoCompile *cfg) */ ppc_addi (code, ppc_r11, ppc_sp, alloc_size - lmf_offset); /* ppc_r3 is the result from mono_get_lmf_addr () */ - ppc_store_reg (code, ppc_r3, G_STRUCT_OFFSET(MonoLMF, lmf_addr), ppc_r11); + ppc_stptr (code, ppc_r3, G_STRUCT_OFFSET(MonoLMF, lmf_addr), ppc_r11); /* new_lmf->previous_lmf = *lmf_addr */ - ppc_load_reg (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r3); - ppc_store_reg (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r11); + ppc_ldptr (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r3); + ppc_stptr (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r11); /* *(lmf_addr) = r11 */ - ppc_store_reg (code, ppc_r11, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r3); + ppc_stptr (code, ppc_r11, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r3); /* save method info */ - ppc_load (code, ppc_r0, method); - ppc_store_reg (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, method), ppc_r11); - ppc_store_reg (code, ppc_sp, G_STRUCT_OFFSET(MonoLMF, ebp), ppc_r11); + if (cfg->compile_aot) + // FIXME: + ppc_load (code, ppc_r0, 0); + else + ppc_load_ptr (code, ppc_r0, method); + ppc_stptr (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, method), ppc_r11); + ppc_stptr (code, ppc_sp, G_STRUCT_OFFSET(MonoLMF, ebp), ppc_r11); /* save the current IP */ - mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_IP, NULL); - ppc_load_sequence (code, ppc_r0, 0x01010101); - ppc_store_reg (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, eip), ppc_r11); + if (cfg->compile_aot) { + ppc_bl (code, 1); + ppc_mflr (code, ppc_r0); + } else { + mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_IP, NULL); +#ifdef __mono_ppc64__ + ppc_load_sequence (code, ppc_r0, (guint64)0x0101010101010101LL); +#else + ppc_load_sequence (code, ppc_r0, (gulong)0x01010101L); +#endif + } + ppc_stptr (code, ppc_r0, G_STRUCT_OFFSET(MonoLMF, eip), ppc_r11); } if (tracing) code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE); cfg->code_len = code - cfg->native_code; - g_assert (cfg->code_len < cfg->code_size); + g_assert (cfg->code_len <= cfg->code_size); g_free (cinfo); return code; @@ -4255,16 +5334,16 @@ mono_arch_emit_epilog (MonoCompile *cfg) ppc_mr (code, ppc_r8, cfg->frame_reg); ppc_addi (code, ppc_r11, cfg->frame_reg, cfg->stack_usage - lmf_offset); /* r5 = previous_lmf */ - ppc_load_reg (code, ppc_r5, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r11); + ppc_ldptr (code, ppc_r5, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r11); /* r6 = lmf_addr */ - ppc_load_reg (code, ppc_r6, G_STRUCT_OFFSET(MonoLMF, lmf_addr), ppc_r11); + ppc_ldptr (code, ppc_r6, G_STRUCT_OFFSET(MonoLMF, lmf_addr), ppc_r11); /* *(lmf_addr) = previous_lmf */ - ppc_store_reg (code, ppc_r5, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r6); + ppc_stptr (code, ppc_r5, G_STRUCT_OFFSET(MonoLMF, previous_lmf), ppc_r6); /* FIXME: speedup: there is no actual need to restore the registers if * we didn't actually change them (idea from Zoltan). */ /* restore iregs */ - ppc_load_multiple_regs (code, ppc_r13, ppc_r11, G_STRUCT_OFFSET(MonoLMF, iregs)); + ppc_ldr_multiple (code, ppc_r13, G_STRUCT_OFFSET(MonoLMF, iregs), ppc_r11); /* restore fregs */ /*for (i = 14; i < 32; i++) { ppc_lfd (code, i, G_STRUCT_OFFSET(MonoLMF, fregs) + ((i-14) * sizeof (gdouble)), ppc_r11); @@ -4272,17 +5351,18 @@ mono_arch_emit_epilog (MonoCompile *cfg) g_assert (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET)); /* use the saved copy of the frame reg in r8 */ if (1 || cfg->flags & MONO_CFG_HAS_CALLS) { - ppc_load_reg (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, ppc_r8); + ppc_ldr (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, ppc_r8); ppc_mtlr (code, ppc_r0); } ppc_addic (code, ppc_sp, ppc_r8, cfg->stack_usage); } else { if (1 || cfg->flags & MONO_CFG_HAS_CALLS) { - if (ppc_is_imm16 (cfg->stack_usage + PPC_RET_ADDR_OFFSET)) { - ppc_load_reg (code, ppc_r0, cfg->stack_usage + PPC_RET_ADDR_OFFSET, cfg->frame_reg); + long return_offset = cfg->stack_usage + PPC_RET_ADDR_OFFSET; + if (ppc_is_imm16 (return_offset)) { + ppc_ldr (code, ppc_r0, return_offset, cfg->frame_reg); } else { - ppc_load (code, ppc_r11, cfg->stack_usage + PPC_RET_ADDR_OFFSET); - ppc_load_reg_indexed (code, ppc_r0, cfg->frame_reg, ppc_r11); + ppc_load (code, ppc_r11, return_offset); + ppc_ldr_indexed (code, ppc_r0, cfg->frame_reg, ppc_r11); } ppc_mtlr (code, ppc_r0); } @@ -4290,29 +5370,29 @@ mono_arch_emit_epilog (MonoCompile *cfg) int offset = cfg->stack_usage; for (i = 13; i <= 31; i++) { if (cfg->used_int_regs & (1 << i)) - offset -= sizeof (gulong); + offset -= sizeof (mgreg_t); } if (cfg->frame_reg != ppc_sp) ppc_mr (code, ppc_r11, cfg->frame_reg); /* note r31 (possibly the frame register) is restored last */ for (i = 13; i <= 31; i++) { if (cfg->used_int_regs & (1 << i)) { - ppc_lwz (code, i, offset, cfg->frame_reg); - offset += sizeof (gulong); + ppc_ldr (code, i, offset, cfg->frame_reg); + offset += sizeof (mgreg_t); } } if (cfg->frame_reg != ppc_sp) - ppc_addic (code, ppc_sp, ppc_r11, cfg->stack_usage); + ppc_addi (code, ppc_sp, ppc_r11, cfg->stack_usage); else - ppc_addic (code, ppc_sp, ppc_sp, cfg->stack_usage); + ppc_addi (code, ppc_sp, ppc_sp, cfg->stack_usage); } else { - ppc_load (code, ppc_r11, cfg->stack_usage); + ppc_load32 (code, ppc_r11, cfg->stack_usage); if (cfg->used_int_regs) { ppc_add (code, ppc_r11, cfg->frame_reg, ppc_r11); for (i = 31; i >= 13; --i) { if (cfg->used_int_regs & (1 << i)) { - pos += sizeof (gulong); - ppc_lwz (code, i, -pos, ppc_r11); + pos += sizeof (mgreg_t); + ppc_ldr (code, i, -pos, ppc_r11); } } ppc_mr (code, ppc_sp, ppc_r11); @@ -4366,13 +5446,12 @@ mono_arch_emit_exceptions (MonoCompile *cfg) /* * make sure we have enough space for exceptions - * 24 is the simulated call to throw_exception_by_name */ for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) { if (patch_info->type == MONO_PATCH_INFO_EXC) { i = exception_id_by_name (patch_info->data.target); if (!exc_throw_found [i]) { - max_epilog_size += 24; + max_epilog_size += (2 * PPC_LOAD_SEQUENCE_LENGTH) + 5 * 4; exc_throw_found [i] = TRUE; } } else if (patch_info->type == MONO_PATCH_INFO_BB_OVF) @@ -4381,7 +5460,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg) MonoOvfJump *ovfj = (MonoOvfJump*)patch_info->data.target; i = exception_id_by_name (ovfj->data.exception); if (!exc_throw_found [i]) { - max_epilog_size += 24; + max_epilog_size += (2 * PPC_LOAD_SEQUENCE_LENGTH) + 5 * 4; exc_throw_found [i] = TRUE; } max_epilog_size += 8; @@ -4411,6 +5490,7 @@ mono_arch_emit_exceptions (MonoCompile *cfg) ppc_b (code, 0); ip = ovfj->data.bb->native_offset + cfg->native_code; ppc_patch (code - 4, ip); + patch_info->type = MONO_PATCH_INFO_NONE; break; } case MONO_PATCH_INFO_EXC_OVF: { @@ -4431,9 +5511,12 @@ mono_arch_emit_exceptions (MonoCompile *cfg) newji->data.target = ovfj->data.exception; newji->next = patch_info->next; patch_info->next = newji; + patch_info->type = MONO_PATCH_INFO_NONE; break; } case MONO_PATCH_INFO_EXC: { + MonoClass *exc_class; + unsigned char *ip = patch_info->ip.i + cfg->native_code; i = exception_id_by_name (patch_info->data.target); if (exc_throw_pos [i]) { @@ -4443,19 +5526,24 @@ mono_arch_emit_exceptions (MonoCompile *cfg) } else { exc_throw_pos [i] = code; } + + exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name); + g_assert (exc_class); + ppc_patch (ip, code); /*mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC_NAME, patch_info->data.target);*/ - ppc_load (code, ppc_r3, patch_info->data.target); - /* we got here from a conditional call, so the calling ip is set in lr already */ + ppc_load (code, ppc_r3, exc_class->type_token); + /* we got here from a conditional call, so the calling ip is set in lr */ + ppc_mflr (code, ppc_r4); patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD; - patch_info->data.name = "mono_arch_throw_exception_by_name"; + patch_info->data.name = "mono_arch_throw_corlib_exception"; patch_info->ip.i = code - cfg->native_code; if (FORCE_INDIR_CALL || cfg->method->dynamic) { ppc_load_func (code, ppc_r0, 0); ppc_mtctr (code, ppc_r0); ppc_bcctr (code, PPC_BR_ALWAYS, 0); } else { - ppc_b (code, 0); + ppc_bl (code, 0); } break; } @@ -4467,10 +5555,10 @@ mono_arch_emit_exceptions (MonoCompile *cfg) cfg->code_len = code - cfg->native_code; - g_assert (cfg->code_len < cfg->code_size); - + g_assert (cfg->code_len <= cfg->code_size); } +#if DEAD_CODE static int try_offset_access (void *value, guint32 idx) { @@ -4484,22 +5572,41 @@ try_offset_access (void *value, guint32 idx) return 0; return 1; } +#endif static void setup_tls_access (void) { guint32 ptk; + +#if defined(__linux__) && defined(_CS_GNU_LIBPTHREAD_VERSION) + size_t conf_size = 0; + char confbuf[128]; +#else + /* FIXME for darwin */ guint32 *ins, *code; guint32 cmplwi_1023, li_0x48, blr_ins; +#endif + +#ifdef TARGET_PS3 + tls_mode = TLS_MODE_FAILED; +#endif + if (tls_mode == TLS_MODE_FAILED) return; - if (g_getenv ("MONO_NO_TLS")) { tls_mode = TLS_MODE_FAILED; return; } - if (tls_mode == TLS_MODE_DETECT) { + if (tls_mode == TLS_MODE_DETECT) { +#if defined(__APPLE__) && defined(__mono_ppc__) && !defined(__mono_ppc64__) + tls_mode = TLS_MODE_DARWIN_G4; +#elif defined(__linux__) && defined(_CS_GNU_LIBPTHREAD_VERSION) + conf_size = confstr ( _CS_GNU_LIBPTHREAD_VERSION, confbuf, sizeof(confbuf)); + if ((conf_size > 4) && (strncmp (confbuf, "NPTL", 4) == 0)) + tls_mode = TLS_MODE_NPTL; +#elif !defined(TARGET_PS3) ins = (guint32*)pthread_getspecific; /* uncond branch to the real method */ if ((*ins >> 26) == 18) { @@ -4508,7 +5615,7 @@ setup_tls_access (void) val >>= 6; if (*ins & 2) { /* absolute */ - ins = (guint32*)val; + ins = (guint32*)(long)val; } else { ins = (guint32*) ((char*)ins + val); } @@ -4544,7 +5651,7 @@ setup_tls_access (void) val >>= 6; if (*ins & 2) { /* absolute */ - ins = (guint32*)val; + ins = (guint32*)(long)val; } else { ins = (guint32*) ((char*)ins + val); } @@ -4571,7 +5678,18 @@ setup_tls_access (void) tls_mode = TLS_MODE_FAILED; return; } +#endif } +#ifndef TARGET_PS3 + if (tls_mode == TLS_MODE_DETECT) + tls_mode = TLS_MODE_FAILED; + if (tls_mode == TLS_MODE_FAILED) + return; + if ((monodomain_key == -1) && (tls_mode == TLS_MODE_NPTL)) { + monodomain_key = mono_domain_get_tls_offset(); + } + /* if not TLS_MODE_NPTL or local dynamic (as indicated by + mono_domain_get_tls_offset returning -1) then use keyed access. */ if (monodomain_key == -1) { ptk = mono_domain_get_tls_key (); if (ptk < 1024) { @@ -4581,6 +5699,12 @@ setup_tls_access (void) } } } + + if ((lmf_pthread_key == -1) && (tls_mode == TLS_MODE_NPTL)) { + lmf_pthread_key = mono_get_lmf_addr_tls_offset(); + } + /* if not TLS_MODE_NPTL or local dynamic (as indicated by + mono_get_lmf_addr_tls_offset returning -1) then use keyed access. */ if (lmf_pthread_key == -1) { ptk = mono_pthread_key_for_tls (mono_jit_tls_id); if (ptk < 1024) { @@ -4592,18 +5716,7 @@ setup_tls_access (void) lmf_pthread_key = ptk; } } - if (monothread_key == -1) { - ptk = mono_thread_get_tls_key (); - if (ptk < 1024) { - ptk = mono_pthread_key_for_tls (ptk); - if (ptk < 1024) { - monothread_key = ptk; - /*g_print ("thread inited: %d\n", ptk);*/ - } - } else { - /*g_print ("thread not inited yet %d\n", ptk);*/ - } - } +#endif } void @@ -4621,6 +5734,7 @@ mono_arch_free_jit_tls_data (MonoJitTlsData *tls) #define CMP_SIZE (PPC_LOAD_SEQUENCE_LENGTH + 4) #define BR_SIZE 4 +#define LOADSTORE_SIZE 4 #define JUMP_IMM_SIZE 12 #define JUMP_IMM32_SIZE (PPC_LOAD_SEQUENCE_LENGTH + 8) #define ENABLE_WRONG_METHOD_CHECK 0 @@ -4642,15 +5756,17 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI if (item->check_target_idx) { if (!item->compare_done) item->chunk_size += CMP_SIZE; - if (fail_tramp) + if (item->has_target_code) item->chunk_size += BR_SIZE + JUMP_IMM32_SIZE; else - item->chunk_size += BR_SIZE + JUMP_IMM_SIZE; + item->chunk_size += LOADSTORE_SIZE + BR_SIZE + JUMP_IMM_SIZE; } else { if (fail_tramp) { item->chunk_size += CMP_SIZE + BR_SIZE + JUMP_IMM32_SIZE * 2; + if (!item->has_target_code) + item->chunk_size += LOADSTORE_SIZE; } else { - item->chunk_size += JUMP_IMM_SIZE; + item->chunk_size += LOADSTORE_SIZE + JUMP_IMM_SIZE; #if ENABLE_WRONG_METHOD_CHECK item->chunk_size += CMP_SIZE + BR_SIZE + 4; #endif @@ -4666,27 +5782,39 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI code = mono_method_alloc_generic_virtual_thunk (domain, size); } else { /* the initial load of the vtable address */ - size += PPC_LOAD_SEQUENCE_LENGTH; - code = mono_code_manager_reserve (domain->code_mp, size); + size += PPC_LOAD_SEQUENCE_LENGTH + LOADSTORE_SIZE; + code = mono_domain_code_reserve (domain, size); } start = code; - if (!fail_tramp) - ppc_load (code, ppc_r11, (gulong)(& (vtable->vtable [0]))); + if (!fail_tramp) { + /* + * We need to save and restore r11 because it might be + * used by the caller as the vtable register, so + * clobbering it will trip up the magic trampoline. + * + * FIXME: Get rid of this by making sure that r11 is + * not used as the vtable register in interface calls. + */ + ppc_stptr (code, ppc_r11, PPC_RET_ADDR_OFFSET, ppc_sp); + ppc_load (code, ppc_r11, (gsize)(& (vtable->vtable [0]))); + } for (i = 0; i < count; ++i) { MonoIMTCheckItem *item = imt_entries [i]; item->code_target = code; if (item->is_equals) { if (item->check_target_idx) { if (!item->compare_done) { - ppc_load (code, ppc_r0, (gulong)item->key); + ppc_load (code, ppc_r0, (gsize)item->key); ppc_compare_log (code, 0, MONO_ARCH_IMT_REG, ppc_r0); } item->jmp_code = code; ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0); - if (fail_tramp) - ppc_load (code, ppc_r0, item->value.target_code); - else - ppc_load_reg (code, ppc_r0, (sizeof (gpointer) * item->value.vtable_slot), ppc_r11); + if (item->has_target_code) { + ppc_load_ptr (code, ppc_r0, item->value.target_code); + } else { + ppc_ldptr (code, ppc_r0, (sizeof (gpointer) * item->value.vtable_slot), ppc_r11); + ppc_ldptr (code, ppc_r11, PPC_RET_ADDR_OFFSET, ppc_sp); + } ppc_mtctr (code, ppc_r0); ppc_bcctr (code, PPC_BR_ALWAYS, 0); } else { @@ -4695,11 +5823,17 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI ppc_compare_log (code, 0, MONO_ARCH_IMT_REG, ppc_r0); item->jmp_code = code; ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0); - ppc_load (code, ppc_r0, item->value.target_code); + if (item->has_target_code) { + ppc_load_ptr (code, ppc_r0, item->value.target_code); + } else { + g_assert (vtable); + ppc_load_ptr (code, ppc_r0, & (vtable->vtable [item->value.vtable_slot])); + ppc_ldptr_indexed (code, ppc_r0, 0, ppc_r0); + } ppc_mtctr (code, ppc_r0); ppc_bcctr (code, PPC_BR_ALWAYS, 0); ppc_patch (item->jmp_code, code); - ppc_load (code, ppc_r0, fail_tramp); + ppc_load_ptr (code, ppc_r0, fail_tramp); ppc_mtctr (code, ppc_r0); ppc_bcctr (code, PPC_BR_ALWAYS, 0); item->jmp_code = NULL; @@ -4711,7 +5845,8 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI item->jmp_code = code; ppc_bc (code, PPC_BR_FALSE, PPC_BR_EQ, 0); #endif - ppc_load_reg (code, ppc_r0, (sizeof (gpointer) * item->value.vtable_slot), ppc_r11); + ppc_ldptr (code, ppc_r0, (sizeof (gpointer) * item->value.vtable_slot), ppc_r11); + ppc_ldptr (code, ppc_r11, PPC_RET_ADDR_OFFSET, ppc_sp); ppc_mtctr (code, ppc_r0); ppc_bcctr (code, PPC_BR_ALWAYS, 0); #if ENABLE_WRONG_METHOD_CHECK @@ -4746,22 +5881,20 @@ mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckI } MonoMethod* -mono_arch_find_imt_method (gpointer *regs, guint8 *code) +mono_arch_find_imt_method (mgreg_t *regs, guint8 *code) { - return (MonoMethod*) regs [MONO_ARCH_IMT_REG]; -} + mgreg_t *r = (mgreg_t*)regs; -MonoObject* -mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSharingContext *gsctx) -{ - return mono_arch_get_this_arg_from_call (gsctx, mono_method_signature (method), (gssize*)regs, NULL); + return (MonoMethod*)(gsize) r [MONO_ARCH_IMT_REG]; } #endif MonoVTable* -mono_arch_find_static_call_vtable (gpointer *regs, guint8 *code) +mono_arch_find_static_call_vtable (mgreg_t *regs, guint8 *code) { - return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG]; + mgreg_t *r = (mgreg_t*)regs; + + return (MonoVTable*)(gsize) r [MONO_ARCH_RGCTX_REG]; } MonoInst* @@ -4790,24 +5923,238 @@ MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg) return ins; } -MonoInst* -mono_arch_get_thread_intrinsic (MonoCompile* cfg) +gpointer +mono_arch_context_get_int_reg (MonoContext *ctx, int reg) { - MonoInst* ins; + if (reg == ppc_r1) + return MONO_CONTEXT_GET_SP (ctx); - setup_tls_access (); - if (monothread_key == -1) - return NULL; + g_assert (reg >= ppc_r13); + + return (gpointer)(gsize)ctx->regs [reg - ppc_r13]; +} + +guint32 +mono_arch_get_patch_offset (guint8 *code) +{ + return 0; +} + +/* + * mono_aot_emit_load_got_addr: + * + * Emit code to load the got address. + * On PPC, the result is placed into r30. + */ +guint8* +mono_arch_emit_load_got_addr (guint8 *start, guint8 *code, MonoCompile *cfg, MonoJumpInfo **ji) +{ + ppc_bl (code, 1); + ppc_mflr (code, ppc_r30); + if (cfg) + mono_add_patch_info (cfg, code - start, MONO_PATCH_INFO_GOT_OFFSET, NULL); + else + *ji = mono_patch_info_list_prepend (*ji, code - start, MONO_PATCH_INFO_GOT_OFFSET, NULL); + /* arch_emit_got_address () patches this */ +#if defined(TARGET_POWERPC64) + ppc_nop (code); + ppc_nop (code); + ppc_nop (code); + ppc_nop (code); +#else + ppc_load32 (code, ppc_r0, 0); + ppc_add (code, ppc_r30, ppc_r30, ppc_r0); +#endif + + return code; +} + +/* + * mono_ppc_emit_load_aotconst: + * + * Emit code to load the contents of the GOT slot identified by TRAMP_TYPE and + * TARGET from the mscorlib GOT in full-aot code. + * On PPC, the GOT address is assumed to be in r30, and the result is placed into + * r11. + */ +guint8* +mono_arch_emit_load_aotconst (guint8 *start, guint8 *code, MonoJumpInfo **ji, int tramp_type, gconstpointer target) +{ + /* Load the mscorlib got address */ + ppc_ldptr (code, ppc_r11, sizeof (gpointer), ppc_r30); + *ji = mono_patch_info_list_prepend (*ji, code - start, tramp_type, target); + /* arch_emit_got_access () patches this */ + ppc_load32 (code, ppc_r0, 0); + ppc_ldptr_indexed (code, ppc_r11, ppc_r11, ppc_r0); + + return code; +} + +/* Soft Debug support */ +#ifdef MONO_ARCH_SOFT_DEBUG_SUPPORTED + +/* + * BREAKPOINTS + */ + +/* + * mono_arch_set_breakpoint: + * + * See mini-amd64.c for docs. + */ +void +mono_arch_set_breakpoint (MonoJitInfo *ji, guint8 *ip) +{ + guint8 *code = ip; + guint8 *orig_code = code; + + ppc_load_sequence (code, ppc_r11, (gsize)bp_trigger_page); + ppc_ldptr (code, ppc_r11, 0, ppc_r11); + + g_assert (code - orig_code == BREAKPOINT_SIZE); + + mono_arch_flush_icache (orig_code, code - orig_code); +} + +/* + * mono_arch_clear_breakpoint: + * + * See mini-amd64.c for docs. + */ +void +mono_arch_clear_breakpoint (MonoJitInfo *ji, guint8 *ip) +{ + guint8 *code = ip; + int i; + + for (i = 0; i < BREAKPOINT_SIZE / 4; ++i) + ppc_nop (code); + + mono_arch_flush_icache (ip, code - ip); +} + +/* + * mono_arch_is_breakpoint_event: + * + * See mini-amd64.c for docs. + */ +gboolean +mono_arch_is_breakpoint_event (void *info, void *sigctx) +{ + siginfo_t* sinfo = (siginfo_t*) info; + /* Sometimes the address is off by 4 */ + if (sinfo->si_addr >= bp_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)bp_trigger_page + 128) + return TRUE; + else + return FALSE; +} + +/* + * mono_arch_get_ip_for_breakpoint: + * + * See mini-amd64.c for docs. + */ +guint8* +mono_arch_get_ip_for_breakpoint (MonoJitInfo *ji, MonoContext *ctx) +{ + guint8 *ip = MONO_CONTEXT_GET_IP (ctx); + + /* ip points at the ldptr instruction */ + ip -= PPC_LOAD_SEQUENCE_LENGTH; + + return ip; +} + +/* + * mono_arch_skip_breakpoint: + * + * See mini-amd64.c for docs. + */ +void +mono_arch_skip_breakpoint (MonoContext *ctx) +{ + /* skip the ldptr */ + MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + 4); +} + +/* + * SINGLE STEPPING + */ - MONO_INST_NEW (cfg, ins, OP_TLS_GET); - ins->inst_offset = monothread_key; - return ins; +/* + * mono_arch_start_single_stepping: + * + * See mini-amd64.c for docs. + */ +void +mono_arch_start_single_stepping (void) +{ + mono_mprotect (ss_trigger_page, mono_pagesize (), 0); +} + +/* + * mono_arch_stop_single_stepping: + * + * See mini-amd64.c for docs. + */ +void +mono_arch_stop_single_stepping (void) +{ + mono_mprotect (ss_trigger_page, mono_pagesize (), MONO_MMAP_READ); } -gpointer -mono_arch_context_get_int_reg (MonoContext *ctx, int reg) +/* + * mono_arch_is_single_step_event: + * + * See mini-amd64.c for docs. + */ +gboolean +mono_arch_is_single_step_event (void *info, void *sigctx) { - g_assert (reg >= ppc_r13); + siginfo_t* sinfo = (siginfo_t*) info; + /* Sometimes the address is off by 4 */ + if (sinfo->si_addr >= ss_trigger_page && (guint8*)sinfo->si_addr <= (guint8*)ss_trigger_page + 128) + return TRUE; + else + return FALSE; +} + +/* + * mono_arch_get_ip_for_single_step: + * + * See mini-amd64.c for docs. + */ +guint8* +mono_arch_get_ip_for_single_step (MonoJitInfo *ji, MonoContext *ctx) +{ + guint8 *ip = MONO_CONTEXT_GET_IP (ctx); - return (gpointer)ctx->regs [reg - ppc_r13]; + /* ip points after the ldptr instruction */ + return ip; +} + +/* + * mono_arch_skip_single_step: + * + * See mini-amd64.c for docs. + */ +void +mono_arch_skip_single_step (MonoContext *ctx) +{ + /* skip the ldptr */ + MONO_CONTEXT_SET_IP (ctx, (guint8*)MONO_CONTEXT_GET_IP (ctx) + 4); } + +/* + * mono_arch_create_seq_point_info: + * + * See mini-amd64.c for docs. + */ +gpointer +mono_arch_get_seq_point_info (MonoDomain *domain, guint8 *code) +{ + NOT_IMPLEMENTED; + return NULL; +} + +#endif