X-Git-Url: http://wien.tomnetworks.com/gitweb/?a=blobdiff_plain;f=mono%2Fmini%2Faot-compiler.c;h=6f13e974c20ccf7feab18eea589ede91e123fbf4;hb=5b558abeeb255a3179d4ca6a85617e051c6abd38;hp=75dedbf19468438a670867d8a1272f6d53806853;hpb=00706612686733a2f0aa45f3d2aab9d3a0a38916;p=mono.git diff --git a/mono/mini/aot-compiler.c b/mono/mini/aot-compiler.c index 75dedbf1946..6f13e974c20 100644 --- a/mono/mini/aot-compiler.c +++ b/mono/mini/aot-compiler.c @@ -184,12 +184,14 @@ typedef struct MonoAotCompile { MonoClass **typespec_classes; GString *llc_args; GString *as_args; + gboolean thumb_mixed; } MonoAotCompile; typedef struct { int plt_offset; - char *symbol; + char *symbol, *llvm_symbol, *debug_sym; MonoJumpInfo *ji; + gboolean jit_used, llvm_used; } MonoPltEntry; #define mono_acfg_lock(acfg) EnterCriticalSection (&((acfg)->mutex)) @@ -376,6 +378,14 @@ emit_string_symbol (MonoAotCompile *acfg, const char *name, const char *value) img_writer_emit_string (acfg->w, value); } +static void +emit_local_string_symbol (MonoAotCompile *acfg, const char *name, const char *value) +{ + img_writer_emit_section_change (acfg->w, RODATA_SECT, 1); + img_writer_emit_label (acfg->w, name); + img_writer_emit_string (acfg->w, value); +} + static G_GNUC_UNUSED void emit_uleb128 (MonoAotCompile *acfg, guint32 value) { @@ -474,7 +484,7 @@ encode_sleb128 (gint32 value, guint8 *buf, guint8 **endbuf) #else #define AOT_FUNC_ALIGNMENT 16 #endif -#if defined(TARGET_X86) && defined(__native_client_codegen__) +#if (defined(TARGET_X86) || defined(TARGET_AMD64)) && defined(__native_client_codegen__) #undef AOT_FUNC_ALIGNMENT #define AOT_FUNC_ALIGNMENT 32 #endif @@ -513,6 +523,8 @@ arch_init (MonoAotCompile *acfg) g_string_append (acfg->llc_args, " -soft-float"); #endif } + if (acfg->aot_opts.mtriple && strstr (acfg->aot_opts.mtriple, "thumb")) + acfg->thumb_mixed = TRUE; if (acfg->aot_opts.mtriple) mono_arch_set_target (acfg->aot_opts.mtriple); @@ -686,8 +698,14 @@ arch_emit_plt_entry (MonoAotCompile *acfg, int index) { #if defined(TARGET_X86) guint32 offset = (acfg->plt_got_offset_base + index) * sizeof (gpointer); - -#ifdef __native_client_codegen__ +#if defined(__default_codegen__) + /* jmp *(%ebx) */ + emit_byte (acfg, 0xff); + emit_byte (acfg, 0xa3); + emit_int32 (acfg, offset); + /* Used by mono_aot_get_plt_info_offset */ + emit_int32 (acfg, acfg->plt_got_info_offsets [index]); +#elif defined(__native_client_codegen__) const guint8 kSizeOfNaClJmp = 11; guint8 bytes[kSizeOfNaClJmp]; guint8 *pbytes = &bytes[0]; @@ -699,15 +717,9 @@ arch_emit_plt_entry (MonoAotCompile *acfg, int index) emit_byte (acfg, 0x68); /* hide data in a push */ emit_int32 (acfg, acfg->plt_got_info_offsets [index]); emit_alignment (acfg, AOT_FUNC_ALIGNMENT); -#else - /* jmp *(%ebx) */ - emit_byte (acfg, 0xff); - emit_byte (acfg, 0xa3); - emit_int32 (acfg, offset); - /* Used by mono_aot_get_plt_info_offset */ - emit_int32 (acfg, acfg->plt_got_info_offsets [index]); -#endif /* __native_client_codegen__ */ +#endif /*__native_client_codegen__*/ #elif defined(TARGET_AMD64) +#if defined(__default_codegen__) /* * We can't emit jumps because they are 32 bits only so they can't be patched. * So we make indirect calls through GOT entries which are patched by the AOT @@ -719,39 +731,38 @@ arch_emit_plt_entry (MonoAotCompile *acfg, int index) emit_symbol_diff (acfg, acfg->got_symbol, ".", ((acfg->plt_got_offset_base + index) * sizeof (gpointer)) -4); /* Used by mono_aot_get_plt_info_offset */ emit_int32 (acfg, acfg->plt_got_info_offsets [index]); +#elif defined(__native_client_codegen__) + guint8 buf [256]; + guint8 *buf_aligned = ALIGN_TO(buf, kNaClAlignment); + guint8 *code = buf_aligned; + + /* mov (%rip), %r11d */ + emit_byte (acfg, '\x45'); + emit_byte (acfg, '\x8b'); + emit_byte (acfg, '\x1d'); + emit_symbol_diff (acfg, acfg->got_symbol, ".", ((acfg->plt_got_offset_base + index) * sizeof (gpointer)) -4); + + amd64_jump_reg (code, AMD64_R11); + /* This should be constant for the plt patch */ + g_assert ((size_t)(code-buf_aligned) == 10); + emit_bytes (acfg, buf_aligned, code - buf_aligned); + + /* Hide data in a push imm32 so it passes validation */ + emit_byte (acfg, 0x68); /* push */ + emit_int32 (acfg, acfg->plt_got_info_offsets [index]); + emit_alignment (acfg, AOT_FUNC_ALIGNMENT); +#endif /*__native_client_codegen__*/ #elif defined(TARGET_ARM) guint8 buf [256]; guint8 *code; - /* FIXME: - * - optimize OP_AOTCONST implementation - * - optimize the PLT entries - * - optimize SWITCH AOT implementation - */ code = buf; - if (acfg->use_bin_writer && FALSE) { - /* FIXME: mono_arch_patch_plt_entry () needs to decode this */ - /* We only emit 1 relocation since we implement it ourselves anyway */ - img_writer_emit_reloc (acfg->w, R_ARM_ALU_PC_G0_NC, acfg->got_symbol, ((acfg->plt_got_offset_base + index) * sizeof (gpointer)) - 8); - /* FIXME: A 2 instruction encoding is sufficient in most cases */ - ARM_ADD_REG_IMM (code, ARMREG_IP, ARMREG_PC, 0, 0); - ARM_ADD_REG_IMM (code, ARMREG_IP, ARMREG_IP, 0, 0); - ARM_LDR_IMM (code, ARMREG_PC, ARMREG_IP, 0); - emit_bytes (acfg, buf, code - buf); - /* Used by mono_aot_get_plt_info_offset */ - emit_int32 (acfg, acfg->plt_got_info_offsets [index]); - } else { - ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0); - ARM_LDR_REG_REG (code, ARMREG_PC, ARMREG_PC, ARMREG_IP); - emit_bytes (acfg, buf, code - buf); - emit_symbol_diff (acfg, acfg->got_symbol, ".", ((acfg->plt_got_offset_base + index) * sizeof (gpointer)) - 4); - /* Used by mono_aot_get_plt_info_offset */ - emit_int32 (acfg, acfg->plt_got_info_offsets [index]); - } - /* - * The plt_got_info_offset is computed automatically by - * mono_aot_get_plt_info_offset (), so no need to save it here. - */ + ARM_LDR_IMM (code, ARMREG_IP, ARMREG_PC, 0); + ARM_LDR_REG_REG (code, ARMREG_PC, ARMREG_PC, ARMREG_IP); + emit_bytes (acfg, buf, code - buf); + emit_symbol_diff (acfg, acfg->got_symbol, ".", ((acfg->plt_got_offset_base + index) * sizeof (gpointer)) - 4); + /* Used by mono_aot_get_plt_info_offset */ + emit_int32 (acfg, acfg->plt_got_info_offsets [index]); #elif defined(TARGET_POWERPC) guint32 offset = (acfg->plt_got_offset_base + index) * sizeof (gpointer); @@ -774,6 +785,33 @@ arch_emit_plt_entry (MonoAotCompile *acfg, int index) #endif } +static void +arch_emit_llvm_plt_entry (MonoAotCompile *acfg, int index) +{ +#if defined(TARGET_ARM) +#if 0 + /* LLVM calls the PLT entries using bl, so emit a stub */ + /* FIXME: Too much overhead on every call */ + fprintf (acfg->fp, ".thumb_func\n"); + fprintf (acfg->fp, "bx pc\n"); + fprintf (acfg->fp, "nop\n"); + fprintf (acfg->fp, ".arm\n"); +#endif + /* LLVM calls the PLT entries using bl, so these have to be thumb2 */ + fprintf (acfg->fp, ".thumb_func\n"); + /* The code below should be 12 bytes long */ + fprintf (acfg->fp, "ldr ip, [pc, #8]\n"); + /* thumb can't encode ld pc, [pc, ip] */ + fprintf (acfg->fp, "add ip, pc, ip\n"); + fprintf (acfg->fp, "ldr ip, [ip, #0]\n"); + fprintf (acfg->fp, "bx ip\n"); + emit_symbol_diff (acfg, acfg->got_symbol, ".", ((acfg->plt_got_offset_base + index) * sizeof (gpointer)) + 4); + emit_int32 (acfg, acfg->plt_got_info_offsets [index]); +#else + g_assert_not_reached (); +#endif +} + /* * arch_emit_specific_trampoline: * @@ -797,6 +835,7 @@ arch_emit_specific_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size * - all the trampolines should be of the same length. */ #if defined(TARGET_AMD64) +#if defined(__default_codegen__) /* This should be exactly 16 bytes long */ *tramp_size = 16; /* call *(%rip) */ @@ -805,8 +844,61 @@ arch_emit_specific_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size emit_byte (acfg, '\x15'); emit_symbol_diff (acfg, acfg->got_symbol, ".", (offset * sizeof (gpointer)) - 4); /* This should be relative to the start of the trampoline */ - emit_symbol_diff (acfg, acfg->got_symbol, ".", (offset * sizeof (gpointer)) - 4 + 19); + emit_symbol_diff (acfg, acfg->got_symbol, ".", ((offset+1) * sizeof (gpointer)) + 7); emit_zero_bytes (acfg, 5); +#elif defined(__native_client_codegen__) + guint8 buf [256]; + guint8 *buf_aligned = ALIGN_TO(buf, kNaClAlignment); + guint8 *code = buf_aligned; + guint8 *call_start; + size_t call_len; + int got_offset; + + /* Emit this call in 'code' so we can find out how long it is. */ + amd64_call_reg (code, AMD64_R11); + call_start = mono_arch_nacl_skip_nops (buf_aligned); + call_len = code - call_start; + + /* The tramp_size is twice the NaCl alignment because it starts with */ + /* a call which needs to be aligned to the end of the boundary. */ + *tramp_size = kNaClAlignment*2; + { + /* Emit nops to align call site below which is 7 bytes plus */ + /* the length of the call sequence emitted above. */ + /* Note: this requires the specific trampoline starts on a */ + /* kNaclAlignedment aligned address, which it does because */ + /* it's its own function that is aligned. */ + guint8 nop_buf[256]; + guint8 *nopbuf_aligned = ALIGN_TO (nop_buf, kNaClAlignment); + guint8 *nopbuf_end = mono_arch_nacl_pad (nopbuf_aligned, kNaClAlignment - 7 - (call_len)); + emit_bytes (acfg, nopbuf_aligned, nopbuf_end - nopbuf_aligned); + } + /* The trampoline is stored at the offset'th pointer, the -4 is */ + /* present because RIP relative addressing starts at the end of */ + /* the current instruction, while the label "." is relative to */ + /* the beginning of the current asm location, which in this case */ + /* is not the mov instruction, but the offset itself, due to the */ + /* way the bytes and ints are emitted here. */ + got_offset = (offset * sizeof(gpointer)) - 4; + + /* mov (%rip), %r11d */ + emit_byte (acfg, '\x45'); + emit_byte (acfg, '\x8b'); + emit_byte (acfg, '\x1d'); + emit_symbol_diff (acfg, acfg->got_symbol, ".", got_offset); + + /* naclcall %r11 */ + emit_bytes (acfg, call_start, call_len); + + /* The arg is stored at the offset+1 pointer, relative to beginning */ + /* of trampoline: 7 for mov, plus the call length, and 1 for push. */ + got_offset = ((offset + 1) * sizeof(gpointer)) + 7 + call_len + 1; + + /* We can't emit this data directly, hide in a "push imm32" */ + emit_byte (acfg, '\x68'); /* push */ + emit_symbol_diff (acfg, acfg->got_symbol, ".", got_offset); + emit_alignment (acfg, kNaClAlignment); +#endif /*__native_client_codegen__*/ #elif defined(TARGET_ARM) guint8 buf [128]; guint8 *code; @@ -993,6 +1085,7 @@ static void arch_emit_static_rgctx_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size) { #if defined(TARGET_AMD64) +#if defined(__default_codegen__) /* This should be exactly 13 bytes long */ *tramp_size = 13; @@ -1006,6 +1099,31 @@ arch_emit_static_rgctx_trampoline (MonoAotCompile *acfg, int offset, int *tramp_ emit_byte (acfg, '\xff'); emit_byte (acfg, '\x25'); emit_symbol_diff (acfg, acfg->got_symbol, ".", ((offset + 1) * sizeof (gpointer)) - 4); +#elif defined(__native_client_codegen__) + guint8 buf [128]; + guint8 *buf_aligned = ALIGN_TO(buf, kNaClAlignment); + guint8 *code = buf_aligned; + + /* mov (%rip), %r10d */ + emit_byte (acfg, '\x45'); + emit_byte (acfg, '\x8b'); + emit_byte (acfg, '\x15'); + emit_symbol_diff (acfg, acfg->got_symbol, ".", (offset * sizeof (gpointer)) - 4); + + /* mov (%rip), %r11d */ + emit_byte (acfg, '\x45'); + emit_byte (acfg, '\x8b'); + emit_byte (acfg, '\x1d'); + emit_symbol_diff (acfg, acfg->got_symbol, ".", ((offset + 1) * sizeof (gpointer)) - 4); + + /* nacljmp *%r11 */ + amd64_jump_reg (code, AMD64_R11); + emit_bytes (acfg, buf_aligned, code - buf_aligned); + + emit_alignment (acfg, kNaClAlignment); + *tramp_size = kNaClAlignment; +#endif /*__native_client_codegen__*/ + #elif defined(TARGET_ARM) guint8 buf [128]; guint8 *code; @@ -1115,50 +1233,74 @@ arch_emit_imt_thunk (MonoAotCompile *acfg, int offset, int *tramp_size) { #if defined(TARGET_AMD64) guint8 *buf, *code; +#if defined(__native_client_codegen__) + guint8 *buf_alloc; +#endif guint8 *labels [3]; + guint8 mov_buf[3]; + guint8 *mov_buf_ptr = mov_buf; + const int kSizeOfMove = 7; +#if defined(__default_codegen__) code = buf = g_malloc (256); +#elif defined(__native_client_codegen__) + buf_alloc = g_malloc (256 + kNaClAlignment + kSizeOfMove); + buf = ((guint)buf_alloc + kNaClAlignment) & ~kNaClAlignmentMask; + /* The RIP relative move below is emitted first */ + buf += kSizeOfMove; + code = buf; +#endif /* FIXME: Optimize this, i.e. use binary search etc. */ /* Maybe move the body into a separate function (slower, but much smaller) */ - /* R11 is a free register */ + /* MONO_ARCH_IMT_SCRATCH_REG is a free register */ labels [0] = code; - amd64_alu_membase_imm (code, X86_CMP, AMD64_R11, 0, 0); + amd64_alu_membase_imm (code, X86_CMP, MONO_ARCH_IMT_SCRATCH_REG, 0, 0); labels [1] = code; - amd64_branch8 (code, X86_CC_Z, FALSE, 0); + amd64_branch8 (code, X86_CC_Z, 0, FALSE); /* Check key */ - amd64_alu_membase_reg (code, X86_CMP, AMD64_R11, 0, MONO_ARCH_IMT_REG); + amd64_alu_membase_reg_size (code, X86_CMP, MONO_ARCH_IMT_SCRATCH_REG, 0, MONO_ARCH_IMT_REG, sizeof (gpointer)); labels [2] = code; - amd64_branch8 (code, X86_CC_Z, FALSE, 0); + amd64_branch8 (code, X86_CC_Z, 0, FALSE); /* Loop footer */ - amd64_alu_reg_imm (code, X86_ADD, AMD64_R11, 2 * sizeof (gpointer)); + amd64_alu_reg_imm (code, X86_ADD, MONO_ARCH_IMT_SCRATCH_REG, 2 * sizeof (gpointer)); amd64_jump_code (code, labels [0]); /* Match */ mono_amd64_patch (labels [2], code); - amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, sizeof (gpointer), 8); - amd64_jump_membase (code, AMD64_R11, 0); + amd64_mov_reg_membase (code, MONO_ARCH_IMT_SCRATCH_REG, MONO_ARCH_IMT_SCRATCH_REG, sizeof (gpointer), sizeof (gpointer)); + amd64_jump_membase (code, MONO_ARCH_IMT_SCRATCH_REG, 0); /* No match */ /* FIXME: */ mono_amd64_patch (labels [1], code); x86_breakpoint (code); - amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 12345678, 8); - - /* mov (%rip), %r11 */ - emit_byte (acfg, '\x4d'); - emit_byte (acfg, '\x8b'); - emit_byte (acfg, '\x1d'); + /* mov (%rip), MONO_ARCH_IMT_SCRATCH_REG */ + amd64_emit_rex (mov_buf_ptr, sizeof(gpointer), MONO_ARCH_IMT_SCRATCH_REG, 0, AMD64_RIP); + *(mov_buf_ptr)++ = (unsigned char)0x8b; /* mov opcode */ + x86_address_byte (mov_buf_ptr, 0, MONO_ARCH_IMT_SCRATCH_REG & 0x7, 5); + emit_bytes (acfg, mov_buf, mov_buf_ptr - mov_buf); emit_symbol_diff (acfg, acfg->got_symbol, ".", (offset * sizeof (gpointer)) - 4); emit_bytes (acfg, buf, code - buf); - *tramp_size = code - buf + 7; + *tramp_size = code - buf + kSizeOfMove; +#if defined(__native_client_codegen__) + /* The tramp will be padded to the next kNaClAlignment bundle. */ + *tramp_size = ALIGN_TO ((*tramp_size), kNaClAlignment); +#endif + +#if defined(__default_codegen__) + g_free (buf); +#elif defined(__native_client_codegen__) + g_free (buf_alloc); +#endif + #elif defined(TARGET_X86) guint8 *buf, *code; #ifdef __native_client_codegen__ @@ -1166,11 +1308,11 @@ arch_emit_imt_thunk (MonoAotCompile *acfg, int offset, int *tramp_size) #endif guint8 *labels [3]; -#ifdef __native_client_codegen__ +#if defined(__default_codegen__) + code = buf = g_malloc (256); +#elif defined(__native_client_codegen__) buf_alloc = g_malloc (256 + kNaClAlignment); code = buf = ((guint)buf_alloc + kNaClAlignment) & ~kNaClAlignmentMask; -#else - code = buf = g_malloc (256); #endif /* Allocate a temporary stack slot */ @@ -1223,6 +1365,13 @@ arch_emit_imt_thunk (MonoAotCompile *acfg, int offset, int *tramp_size) emit_bytes (acfg, buf, code - buf); *tramp_size = code - buf; + +#if defined(__default_codegen__) + g_free (buf); +#elif defined(__native_client_codegen__) + g_free (buf_alloc); +#endif + #elif defined(TARGET_ARM) guint8 buf [128]; guint8 *code, *code2, *labels [16]; @@ -1503,6 +1652,24 @@ add_to_blob (MonoAotCompile *acfg, const guint8 *data, guint32 data_len) return add_stream_data (&acfg->blob, (char*)data, data_len); } +static guint32 +add_to_blob_aligned (MonoAotCompile *acfg, const guint8 *data, guint32 data_len, guint32 align) +{ + char buf [4] = {0}; + guint32 count; + + if (acfg->blob.alloc_size == 0) + stream_init (&acfg->blob); + + count = acfg->blob.index % align; + + /* we assume the stream data will be aligned */ + if (count) + add_stream_data (&acfg->blob, buf, 4 - count); + + return add_stream_data (&acfg->blob, (char*)data, data_len); +} + /* * emit_offset_table: * @@ -1822,12 +1989,19 @@ encode_method_ref (MonoAotCompile *acfg, MonoMethod *method, guint8 *buf, guint8 break; } case MONO_WRAPPER_UNKNOWN: - if (strcmp (method->name, "FastMonitorEnter") == 0) + if (strcmp (method->name, "FastMonitorEnter") == 0) { encode_value (MONO_AOT_WRAPPER_MONO_ENTER, p, &p); - else if (strcmp (method->name, "FastMonitorExit") == 0) + } else if (strcmp (method->name, "FastMonitorExit") == 0) { encode_value (MONO_AOT_WRAPPER_MONO_EXIT, p, &p); - else + } else if (strcmp (method->name, "PtrToStructure") == 0) { + encode_value (MONO_AOT_WRAPPER_PTR_TO_STRUCTURE, p, &p); + encode_klass_ref (acfg, method->klass, p, &p); + } else if (strcmp (method->name, "StructureToPtr") == 0) { + encode_value (MONO_AOT_WRAPPER_STRUCTURE_TO_PTR, p, &p); + encode_klass_ref (acfg, method->klass, p, &p); + } else { g_assert_not_reached (); + } break; case MONO_WRAPPER_SYNCHRONIZED: case MONO_WRAPPER_MANAGED_TO_NATIVE: @@ -2053,6 +2227,7 @@ get_plt_entry (MonoAotCompile *acfg, MonoJumpInfo *patch_info) res->plt_offset = acfg->plt_offset; res->ji = new_ji; res->symbol = get_plt_symbol (acfg, res->plt_offset, patch_info); + res->llvm_symbol = g_strdup_printf ("%s_llvm", res->symbol); g_hash_table_insert (acfg->patch_to_plt_entry, new_ji, res); @@ -3096,6 +3271,7 @@ emit_and_reloc_code (MonoAotCompile *acfg, MonoMethod *method, guint8 *code, gui /* Nullify the patch */ patch_info->type = MONO_PATCH_INFO_NONE; + plt_entry->jit_used = TRUE; } } @@ -3559,14 +3735,14 @@ emit_exception_debug_info (MonoAotCompile *acfg, MonoCompile *cfg) seq_points = cfg->seq_point_info; - buf_size = header->num_clauses * 256 + debug_info_size + 1024 + (seq_points ? (seq_points->len * 64) : 0); + buf_size = header->num_clauses * 256 + debug_info_size + 1024 + (seq_points ? (seq_points->len * 64) : 0) + cfg->gc_map_size; p = buf = g_malloc (buf_size); #ifdef MONO_ARCH_HAVE_XP_UNWIND use_unwind_ops = cfg->unwind_ops != NULL; #endif - flags = (jinfo->has_generic_jit_info ? 1 : 0) | (use_unwind_ops ? 2 : 0) | (header->num_clauses ? 4 : 0) | (seq_points ? 8 : 0) | (cfg->compile_llvm ? 16 : 0) | (jinfo->has_try_block_holes ? 32 : 0); + flags = (jinfo->has_generic_jit_info ? 1 : 0) | (use_unwind_ops ? 2 : 0) | (header->num_clauses ? 4 : 0) | (seq_points ? 8 : 0) | (cfg->compile_llvm ? 16 : 0) | (jinfo->has_try_block_holes ? 32 : 0) | (cfg->gc_map ? 64 : 0); encode_value (flags, p, &p); @@ -3700,7 +3876,6 @@ emit_exception_debug_info (MonoAotCompile *acfg, MonoCompile *cfg) } } - g_assert (debug_info_size < buf_size); encode_value (debug_info_size, p, &p); @@ -3710,12 +3885,23 @@ emit_exception_debug_info (MonoAotCompile *acfg, MonoCompile *cfg) g_free (debug_info); } + /* GC Map */ + if (cfg->gc_map) { + encode_value (cfg->gc_map_size, p, &p); + /* The GC map requires 4 bytes of alignment */ + while ((gsize)p % 4) + p ++; + memcpy (p, cfg->gc_map, cfg->gc_map_size); + p += cfg->gc_map_size; + } + acfg->stats.ex_info_size += p - buf; g_assert (p - buf < buf_size); /* Emit info */ - cfg->ex_info_offset = add_to_blob (acfg, buf, p - buf); + /* The GC Map requires 4 byte alignment */ + cfg->ex_info_offset = add_to_blob_aligned (acfg, buf, p - buf, cfg->gc_map ? 4 : 1); g_free (buf); } @@ -3805,14 +3991,49 @@ emit_klass_info (MonoAotCompile *acfg, guint32 token) return res; } +static char* +get_plt_entry_debug_sym (MonoAotCompile *acfg, MonoJumpInfo *ji, GHashTable *cache) +{ + char *debug_sym; + + switch (ji->type) { + case MONO_PATCH_INFO_METHOD: + debug_sym = get_debug_sym (ji->data.method, "plt_", cache); + break; + case MONO_PATCH_INFO_INTERNAL_METHOD: + debug_sym = g_strdup_printf ("plt__jit_icall_%s", ji->data.name); + break; + case MONO_PATCH_INFO_CLASS_INIT: + debug_sym = g_strdup_printf ("plt__class_init_%s", mono_type_get_name (&ji->data.klass->byval_arg)); + sanitize_symbol (debug_sym); + break; + case MONO_PATCH_INFO_RGCTX_FETCH: + debug_sym = g_strdup_printf ("plt__rgctx_fetch_%d", acfg->label_generator ++); + break; + case MONO_PATCH_INFO_ICALL_ADDR: { + char *s = get_debug_sym (ji->data.method, "", cache); + + debug_sym = g_strdup_printf ("plt__icall_native_%s", s); + g_free (s); + break; + } + case MONO_PATCH_INFO_JIT_ICALL_ADDR: + debug_sym = g_strdup_printf ("plt__jit_icall_native_%s", ji->data.name); + break; + case MONO_PATCH_INFO_GENERIC_CLASS_INIT: + debug_sym = g_strdup_printf ("plt__generic_class_init"); + break; + default: + break; + } + + return debug_sym; +} + /* * Calls made from AOTed code are routed through a table of jumps similar to the - * ELF PLT (Program Linkage Table). The differences are the following: - * - the ELF PLT entries make an indirect jump though the GOT so they expect the - * GOT pointer to be in EBX. We want to avoid this, so our table contains direct - * jumps. This means the jumps need to be patched when the address of the callee is - * known. Initially the PLT entries jump to code which transfers control to the - * AOT runtime through the first PLT entry. + * ELF PLT (Program Linkage Table). Initially the PLT entries jump to code which transfers + * control to the AOT runtime through a trampoline. */ static void emit_plt (MonoAotCompile *acfg) @@ -3827,28 +4048,23 @@ emit_plt (MonoAotCompile *acfg) sprintf (symbol, "plt"); emit_section_change (acfg, ".text", 0); - emit_global (acfg, symbol, TRUE); - emit_alignment (acfg, 16); + emit_alignment (acfg, NACL_SIZE(16, kNaClAlignment)); emit_label (acfg, symbol); emit_label (acfg, acfg->plt_symbol); for (i = 0; i < acfg->plt_offset; ++i) { - char label [128]; char *debug_sym = NULL; MonoPltEntry *plt_entry = NULL; MonoJumpInfo *ji; - if (i == 0) { + if (i == 0) /* - * The first plt entry is used to transfer code to the AOT loader. + * The first plt entry is unused. */ - arch_emit_plt_entry (acfg, i); continue; - } plt_entry = g_hash_table_lookup (acfg->plt_offset_to_entry, GUINT_TO_POINTER (i)); ji = plt_entry->ji; - sprintf (label, "%s", plt_entry->symbol); if (acfg->llvm) { /* @@ -3860,63 +4076,93 @@ emit_plt (MonoAotCompile *acfg) */ if (ji && is_direct_callable (acfg, NULL, ji) && !acfg->use_bin_writer) { MonoCompile *callee_cfg = g_hash_table_lookup (acfg->method_to_cfg, ji->data.method); - fprintf (acfg->fp, "\n.set %s, %s\n", label, callee_cfg->asm_symbol); + + if (acfg->thumb_mixed && !callee_cfg->compile_llvm) { + /* LLVM calls the PLT entries using bl, so emit a stub */ + emit_label (acfg, plt_entry->llvm_symbol); + fprintf (acfg->fp, ".thumb_func\n"); + fprintf (acfg->fp, "bx pc\n"); + fprintf (acfg->fp, "nop\n"); + fprintf (acfg->fp, ".arm\n"); + fprintf (acfg->fp, "b %s\n", callee_cfg->asm_symbol); + } else { + fprintf (acfg->fp, "\n.set %s, %s\n", plt_entry->llvm_symbol, callee_cfg->asm_symbol); + } continue; } } - emit_label (acfg, label); + if (acfg->aot_opts.write_symbols) + plt_entry->debug_sym = get_plt_entry_debug_sym (acfg, ji, cache); + debug_sym = plt_entry->debug_sym; - if (acfg->aot_opts.write_symbols) { - switch (ji->type) { - case MONO_PATCH_INFO_METHOD: - debug_sym = get_debug_sym (ji->data.method, "plt_", cache); - break; - case MONO_PATCH_INFO_INTERNAL_METHOD: - debug_sym = g_strdup_printf ("plt__jit_icall_%s", ji->data.name); - break; - case MONO_PATCH_INFO_CLASS_INIT: - debug_sym = g_strdup_printf ("plt__class_init_%s", mono_type_get_name (&ji->data.klass->byval_arg)); - sanitize_symbol (debug_sym); - break; - case MONO_PATCH_INFO_RGCTX_FETCH: - debug_sym = g_strdup_printf ("plt__rgctx_fetch_%d", acfg->label_generator ++); - break; - case MONO_PATCH_INFO_ICALL_ADDR: { - char *s = get_debug_sym (ji->data.method, "", cache); - - debug_sym = g_strdup_printf ("plt__icall_native_%s", s); - g_free (s); - break; - } - case MONO_PATCH_INFO_JIT_ICALL_ADDR: - debug_sym = g_strdup_printf ("plt__jit_icall_native_%s", ji->data.name); - break; - case MONO_PATCH_INFO_GENERIC_CLASS_INIT: - debug_sym = g_strdup_printf ("plt__generic_class_init"); - break; - default: - break; + if (acfg->thumb_mixed && !plt_entry->jit_used) + /* Emit only a thumb version */ + continue; + + if (!acfg->thumb_mixed) + emit_label (acfg, plt_entry->llvm_symbol); + + if (debug_sym) { + emit_local_symbol (acfg, debug_sym, NULL, TRUE); + emit_label (acfg, debug_sym); + } + + emit_label (acfg, plt_entry->symbol); + + arch_emit_plt_entry (acfg, i); + + if (debug_sym) + emit_symbol_size (acfg, debug_sym, "."); + } + + if (acfg->thumb_mixed) { + /* + * Emit a separate set of PLT entries using thumb2 which is called by LLVM generated + * code. + */ + for (i = 0; i < acfg->plt_offset; ++i) { + char *debug_sym = NULL; + MonoPltEntry *plt_entry = NULL; + MonoJumpInfo *ji; + + if (i == 0) + continue; + + plt_entry = g_hash_table_lookup (acfg->plt_offset_to_entry, GUINT_TO_POINTER (i)); + ji = plt_entry->ji; + + if (ji && is_direct_callable (acfg, NULL, ji) && !acfg->use_bin_writer) + continue; + + /* Skip plt entries not actually called by LLVM code */ + if (!plt_entry->llvm_used) + continue; + + if (acfg->aot_opts.write_symbols) { + if (plt_entry->debug_sym) + debug_sym = g_strdup_printf ("%s_thumb", plt_entry->debug_sym); } if (debug_sym) { emit_local_symbol (acfg, debug_sym, NULL, TRUE); emit_label (acfg, debug_sym); } - } - arch_emit_plt_entry (acfg, i); + emit_label (acfg, plt_entry->llvm_symbol); - if (debug_sym) { - emit_symbol_size (acfg, debug_sym, "."); - g_free (debug_sym); + arch_emit_llvm_plt_entry (acfg, i); + + if (debug_sym) { + emit_symbol_size (acfg, debug_sym, "."); + g_free (debug_sym); + } } } emit_symbol_size (acfg, acfg->plt_symbol, "."); sprintf (symbol, "plt_end"); - emit_global (acfg, symbol, TRUE); emit_label (acfg, symbol); g_hash_table_destroy (cache); @@ -4151,7 +4397,6 @@ emit_trampolines (MonoAotCompile *acfg) g_assert_not_reached (); } - emit_global (acfg, symbol, TRUE); emit_alignment (acfg, AOT_FUNC_ALIGNMENT); emit_label (acfg, symbol); @@ -4769,8 +5014,9 @@ mono_aot_get_plt_symbol (MonoJumpInfoType type, gconstpointer data) return NULL; plt_entry = get_plt_entry (llvm_acfg, ji); + plt_entry->llvm_used = TRUE; - return g_strdup_printf (plt_entry->symbol); + return g_strdup_printf (plt_entry->llvm_symbol); } MonoJumpInfo* @@ -4864,14 +5110,14 @@ emit_llvm_file (MonoAotCompile *acfg) if (!acfg->llc_args) acfg->llc_args = g_string_new (""); -#if !LLVM_CHECK_VERSION(2, 8) - /* LLVM 2.8 removed the -f flag ??? */ - g_string_append (acfg->llc_args, " -f"); -#endif + /* Verbose asm slows down llc greatly */ + g_string_append (acfg->llc_args, " -asm-verbose=false"); if (acfg->aot_opts.mtriple) g_string_append_printf (acfg->llc_args, " -mtriple=%s", acfg->aot_opts.mtriple); + unlink (acfg->tmpfname); + command = g_strdup_printf ("llc %s -relocation-model=pic -unwind-tables -disable-gnu-eh-frame -enable-mono-eh-frame -o %s temp.opt.bc", acfg->llc_args->str, acfg->tmpfname); printf ("Executing llc: %s\n", command); @@ -4905,7 +5151,6 @@ emit_code (MonoAotCompile *acfg) */ sprintf (symbol, "methods"); emit_section_change (acfg, ".text", 0); - emit_global (acfg, symbol, TRUE); emit_alignment (acfg, 8); if (acfg->llvm) { for (i = 0; i < acfg->nmethods; ++i) { @@ -4925,7 +5170,17 @@ emit_code (MonoAotCompile *acfg) * Emit some padding so the local symbol for the first method doesn't have the * same address as 'methods'. */ +#if defined(__default_codegen__) emit_zero_bytes (acfg, 16); +#elif defined(__native_client_codegen__) + { + const int kPaddingSize = 16; + guint8 pad_buffer[kPaddingSize]; + mono_arch_nacl_pad (pad_buffer, kPaddingSize); + emit_bytes (acfg, pad_buffer, kPaddingSize); + } +#endif + for (l = acfg->method_order; l != NULL; l = l->next) { MonoCompile *cfg; @@ -4971,13 +5226,11 @@ emit_code (MonoAotCompile *acfg) sprintf (symbol, "methods_end"); emit_section_change (acfg, ".text", 0); - emit_global (acfg, symbol, FALSE); emit_alignment (acfg, 8); emit_label (acfg, symbol); sprintf (symbol, "code_offsets"); emit_section_change (acfg, RODATA_SECT, 1); - emit_global (acfg, symbol, FALSE); emit_alignment (acfg, 8); emit_label (acfg, symbol); @@ -5017,7 +5270,6 @@ emit_info (MonoAotCompile *acfg) sprintf (symbol, "method_info_offsets"); emit_section_change (acfg, RODATA_SECT, 1); - emit_global (acfg, symbol, FALSE); emit_alignment (acfg, 8); emit_label (acfg, symbol); @@ -5280,25 +5532,39 @@ emit_extra_methods (MonoAotCompile *acfg) name = NULL; if (method->wrapper_type) { + gboolean encode_ref = FALSE; + /* * We encode some wrappers using their name, since encoding them - * directly would be difficult. This also avoids creating the wrapper - * methods at runtime, since they are not needed anyway. + * directly would be difficult. This works because at runtime, we only need to + * check whenever a method ref matches an existing MonoMethod. The downside is + * that the method names are large, so we use the binary encoding if possible. */ switch (method->wrapper_type) { case MONO_WRAPPER_REMOTING_INVOKE_WITH_CHECK: case MONO_WRAPPER_SYNCHRONIZED: - /* encode_method_ref () can handle these */ + encode_ref = TRUE; + break; + case MONO_WRAPPER_MANAGED_TO_NATIVE: + /* Skip JIT icall wrappers */ + if (!strstr (method->name, "__icall_wrapper")) + encode_ref = TRUE; break; + case MONO_WRAPPER_UNKNOWN: + if (!strcmp (method->name, "PtrToStructure") || !strcmp (method->name, "StructureToPtr")) + encode_ref = TRUE; + break; case MONO_WRAPPER_RUNTIME_INVOKE: if (mono_marshal_method_from_wrapper (method) != method && !strstr (method->name, "virtual")) /* Direct wrapper, encode normally */ - break; - /* Fall through */ + encode_ref = TRUE; + break; default: - name = mono_aot_wrapper_name (method); break; } + + if (!encode_ref) + name = mono_aot_wrapper_name (method); } if (name) { @@ -5367,7 +5633,6 @@ emit_extra_methods (MonoAotCompile *acfg) /* Emit the table */ sprintf (symbol, "extra_method_table"); emit_section_change (acfg, RODATA_SECT, 0); - emit_global (acfg, symbol, FALSE); emit_alignment (acfg, 8); emit_label (acfg, symbol); @@ -5396,7 +5661,6 @@ emit_extra_methods (MonoAotCompile *acfg) */ sprintf (symbol, "extra_method_info_offsets"); emit_section_change (acfg, RODATA_SECT, 0); - emit_global (acfg, symbol, FALSE); emit_alignment (acfg, 8); emit_label (acfg, symbol); @@ -5428,7 +5692,6 @@ emit_exception_info (MonoAotCompile *acfg) sprintf (symbol, "ex_info_offsets"); emit_section_change (acfg, RODATA_SECT, 1); - emit_global (acfg, symbol, FALSE); emit_alignment (acfg, 8); emit_label (acfg, symbol); @@ -5452,7 +5715,6 @@ emit_unwind_info (MonoAotCompile *acfg) emit_section_change (acfg, RODATA_SECT, 1); emit_alignment (acfg, 8); emit_label (acfg, symbol); - emit_global (acfg, symbol, FALSE); for (i = 0; i < acfg->unwind_ops->len; ++i) { guint32 index = GPOINTER_TO_UINT (g_ptr_array_index (acfg->unwind_ops, i)); @@ -5470,18 +5732,6 @@ emit_unwind_info (MonoAotCompile *acfg) acfg->stats.unwind_info_size += (p - buf) + unwind_info_len; } - - /* - * Emit a reference to the mono_eh_frame table created by our modified LLVM compiler. - */ - if (acfg->llvm) { - sprintf (symbol, "mono_eh_frame_addr"); - emit_section_change (acfg, ".data", 0); - emit_global (acfg, symbol, FALSE); - emit_alignment (acfg, 8); - emit_label (acfg, symbol); - emit_pointer (acfg, "mono_eh_frame"); - } } static void @@ -5497,7 +5747,6 @@ emit_class_info (MonoAotCompile *acfg) sprintf (symbol, "class_info_offsets"); emit_section_change (acfg, RODATA_SECT, 1); - emit_global (acfg, symbol, FALSE); emit_alignment (acfg, 8); emit_label (acfg, symbol); @@ -5558,7 +5807,6 @@ emit_class_name_table (MonoAotCompile *acfg) /* Emit the table */ sprintf (symbol, "class_name_table"); emit_section_change (acfg, RODATA_SECT, 0); - emit_global (acfg, symbol, FALSE); emit_alignment (acfg, 8); emit_label (acfg, symbol); @@ -5593,9 +5841,8 @@ emit_image_table (MonoAotCompile *acfg) * So we emit it at once, and reference its elements by an index. */ - sprintf (symbol, "mono_image_table"); + sprintf (symbol, "image_table"); emit_section_change (acfg, RODATA_SECT, 1); - emit_global (acfg, symbol, FALSE); emit_alignment (acfg, 8); emit_label (acfg, symbol); @@ -5678,7 +5925,6 @@ emit_got_info (MonoAotCompile *acfg) /* Emit got_info_offsets table */ sprintf (symbol, "got_info_offsets"); emit_section_change (acfg, RODATA_SECT, 1); - emit_global (acfg, symbol, FALSE); emit_alignment (acfg, 8); emit_label (acfg, symbol); @@ -5704,13 +5950,6 @@ emit_got (MonoAotCompile *acfg) sprintf (symbol, "got_end"); emit_label (acfg, symbol); } - - sprintf (symbol, "mono_aot_got_addr"); - emit_section_change (acfg, ".data", 0); - emit_global (acfg, symbol, FALSE); - emit_alignment (acfg, 8); - emit_label (acfg, symbol); - emit_pointer (acfg, acfg->got_symbol); } typedef struct GlobalsTableEntry { @@ -5821,16 +6060,14 @@ emit_globals (MonoAotCompile *acfg) { char *build_info; - emit_string_symbol (acfg, "mono_assembly_guid" , acfg->image->guid); - - emit_string_symbol (acfg, "mono_aot_version", MONO_AOT_FILE_VERSION); + emit_local_string_symbol (acfg, "assembly_guid" , acfg->image->guid); if (acfg->aot_opts.bind_to_runtime_version) { build_info = mono_get_runtime_build_info (); - emit_string_symbol (acfg, "mono_runtime_version", build_info); + emit_local_string_symbol (acfg, "runtime_version", build_info); g_free (build_info); } else { - emit_string_symbol (acfg, "mono_runtime_version", ""); + emit_local_string_symbol (acfg, "runtime_version", ""); } /* @@ -5896,7 +6133,6 @@ emit_mem_end (MonoAotCompile *acfg) sprintf (symbol, "mem_end"); emit_section_change (acfg, ".text", 1); - emit_global (acfg, symbol, FALSE); emit_alignment (acfg, 8); emit_label (acfg, symbol); } @@ -5926,6 +6162,57 @@ emit_file_info (MonoAotCompile *acfg) emit_global (acfg, symbol, FALSE); /* The data emitted here must match MonoAotFileInfo. */ + + emit_int32 (acfg, MONO_AOT_FILE_VERSION); + emit_int32 (acfg, 0); + + /* + * We emit pointers to our data structures instead of emitting global symbols which + * point to them, to reduce the number of globals, and because using globals leads to + * various problems (i.e. arm/thumb). + */ + emit_pointer (acfg, acfg->got_symbol); + emit_pointer (acfg, "methods"); + if (acfg->llvm) { + /* + * Emit a reference to the mono_eh_frame table created by our modified LLVM compiler. + */ + emit_pointer (acfg, "mono_eh_frame"); + } else { + emit_pointer (acfg, NULL); + } + emit_pointer (acfg, "blob"); + emit_pointer (acfg, "class_name_table"); + emit_pointer (acfg, "class_info_offsets"); + emit_pointer (acfg, "method_info_offsets"); + emit_pointer (acfg, "ex_info_offsets"); + emit_pointer (acfg, "code_offsets"); + emit_pointer (acfg, "extra_method_info_offsets"); + emit_pointer (acfg, "extra_method_table"); + emit_pointer (acfg, "got_info_offsets"); + emit_pointer (acfg, "methods_end"); + emit_pointer (acfg, "unwind_info"); + emit_pointer (acfg, "mem_end"); + emit_pointer (acfg, "image_table"); + emit_pointer (acfg, "plt"); + emit_pointer (acfg, "plt_end"); + emit_pointer (acfg, "assembly_guid"); + emit_pointer (acfg, "runtime_version"); + if (acfg->num_trampoline_got_entries) { + emit_pointer (acfg, "specific_trampolines"); + emit_pointer (acfg, "static_rgctx_trampolines"); + emit_pointer (acfg, "imt_thunks"); + } else { + emit_pointer (acfg, NULL); + emit_pointer (acfg, NULL); + emit_pointer (acfg, NULL); + } + if (acfg->thumb_mixed) { + emit_pointer (acfg, "thumb_end"); + } else { + emit_pointer (acfg, NULL); + } + emit_int32 (acfg, acfg->plt_got_offset_base); emit_int32 (acfg, (int)(acfg->got_offset * sizeof (gpointer))); emit_int32 (acfg, acfg->plt_offset); @@ -5949,7 +6236,6 @@ emit_blob (MonoAotCompile *acfg) sprintf (symbol, "blob"); emit_section_change (acfg, RODATA_SECT, 1); - emit_global (acfg, symbol, FALSE); emit_alignment (acfg, 8); emit_label (acfg, symbol); @@ -6118,7 +6404,11 @@ compile_asm (MonoAotCompile *acfg) #endif #ifdef __native_client_codegen__ +#if defined(TARGET_AMD64) +#define AS_NAME "nacl64-as" +#else #define AS_NAME "nacl-as" +#endif #else #define AS_NAME "as" #endif @@ -6509,6 +6799,20 @@ mono_compile_assembly (MonoAssembly *ass, guint32 opts, const char *aot_options) if (acfg->dwarf) mono_dwarf_writer_emit_base_info (acfg->dwarf, mono_unwind_get_cie_program ()); + if (acfg->thumb_mixed) { + char symbol [256]; + /* + * This global symbol marks the end of THUMB code, and the beginning of ARM + * code generated by our JIT. + */ + sprintf (symbol, "thumb_end"); + emit_section_change (acfg, ".text", 0); + emit_label (acfg, symbol); + fprintf (acfg->fp, ".skip 16\n"); + + fprintf (acfg->fp, ".arm\n"); + } + emit_code (acfg); emit_info (acfg);