Merge remote branch 'upstream/master'
[mono.git] / mono / mini / aot-compiler.c
index cd3528a93d71db8bc06fe8693ad69ea8cd32cb27..6f13e974c20ccf7feab18eea589ede91e123fbf4 100644 (file)
@@ -189,8 +189,9 @@ typedef struct MonoAotCompile {
 
 typedef struct {
        int plt_offset;
-       char *symbol, *llvm_symbol;
+       char *symbol, *llvm_symbol, *debug_sym;
        MonoJumpInfo *ji;
+       gboolean jit_used, llvm_used;
 } MonoPltEntry;
 
 #define mono_acfg_lock(acfg) EnterCriticalSection (&((acfg)->mutex))
@@ -483,7 +484,7 @@ encode_sleb128 (gint32 value, guint8 *buf, guint8 **endbuf)
 #else
 #define AOT_FUNC_ALIGNMENT 16
 #endif
-#if defined(TARGET_X86) && defined(__native_client_codegen__)
+#if (defined(TARGET_X86) || defined(TARGET_AMD64)) && defined(__native_client_codegen__)
 #undef AOT_FUNC_ALIGNMENT
 #define AOT_FUNC_ALIGNMENT 32
 #endif
@@ -697,8 +698,14 @@ arch_emit_plt_entry (MonoAotCompile *acfg, int index)
 {
 #if defined(TARGET_X86)
                guint32 offset = (acfg->plt_got_offset_base + index) * sizeof (gpointer);
-
-#ifdef __native_client_codegen__
+#if defined(__default_codegen__)
+               /* jmp *<offset>(%ebx) */
+               emit_byte (acfg, 0xff);
+               emit_byte (acfg, 0xa3);
+               emit_int32 (acfg, offset);
+               /* Used by mono_aot_get_plt_info_offset */
+               emit_int32 (acfg, acfg->plt_got_info_offsets [index]);
+#elif defined(__native_client_codegen__)
                const guint8 kSizeOfNaClJmp = 11;
                guint8 bytes[kSizeOfNaClJmp];
                guint8 *pbytes = &bytes[0];
@@ -710,15 +717,9 @@ arch_emit_plt_entry (MonoAotCompile *acfg, int index)
                emit_byte (acfg, 0x68);  /* hide data in a push */
                emit_int32 (acfg, acfg->plt_got_info_offsets [index]);
                emit_alignment (acfg, AOT_FUNC_ALIGNMENT);
-#else
-               /* jmp *<offset>(%ebx) */
-               emit_byte (acfg, 0xff);
-               emit_byte (acfg, 0xa3);
-               emit_int32 (acfg, offset);
-               /* Used by mono_aot_get_plt_info_offset */
-               emit_int32 (acfg, acfg->plt_got_info_offsets [index]);
-#endif  /* __native_client_codegen__ */
+#endif /*__native_client_codegen__*/
 #elif defined(TARGET_AMD64)
+#if defined(__default_codegen__)
                /*
                 * We can't emit jumps because they are 32 bits only so they can't be patched.
                 * So we make indirect calls through GOT entries which are patched by the AOT 
@@ -730,6 +731,27 @@ arch_emit_plt_entry (MonoAotCompile *acfg, int index)
                emit_symbol_diff (acfg, acfg->got_symbol, ".", ((acfg->plt_got_offset_base + index) * sizeof (gpointer)) -4);
                /* Used by mono_aot_get_plt_info_offset */
                emit_int32 (acfg, acfg->plt_got_info_offsets [index]);
+#elif defined(__native_client_codegen__)
+               guint8 buf [256];
+               guint8 *buf_aligned = ALIGN_TO(buf, kNaClAlignment);
+               guint8 *code = buf_aligned;
+
+               /* mov <OFFSET>(%rip), %r11d */
+               emit_byte (acfg, '\x45');
+               emit_byte (acfg, '\x8b');
+               emit_byte (acfg, '\x1d');
+               emit_symbol_diff (acfg, acfg->got_symbol, ".", ((acfg->plt_got_offset_base + index) * sizeof (gpointer)) -4);
+
+               amd64_jump_reg (code, AMD64_R11);
+               /* This should be constant for the plt patch */
+               g_assert ((size_t)(code-buf_aligned) == 10);
+               emit_bytes (acfg, buf_aligned, code - buf_aligned);
+
+               /* Hide data in a push imm32 so it passes validation */
+               emit_byte (acfg, 0x68);  /* push */
+               emit_int32 (acfg, acfg->plt_got_info_offsets [index]);
+               emit_alignment (acfg, AOT_FUNC_ALIGNMENT);
+#endif /*__native_client_codegen__*/
 #elif defined(TARGET_ARM)
                guint8 buf [256];
                guint8 *code;
@@ -763,6 +785,33 @@ arch_emit_plt_entry (MonoAotCompile *acfg, int index)
 #endif
 }
 
+static void
+arch_emit_llvm_plt_entry (MonoAotCompile *acfg, int index)
+{
+#if defined(TARGET_ARM)
+#if 0
+       /* LLVM calls the PLT entries using bl, so emit a stub */
+       /* FIXME: Too much overhead on every call */
+       fprintf (acfg->fp, ".thumb_func\n");
+       fprintf (acfg->fp, "bx pc\n");
+       fprintf (acfg->fp, "nop\n");
+       fprintf (acfg->fp, ".arm\n");
+#endif
+       /* LLVM calls the PLT entries using bl, so these have to be thumb2 */
+       fprintf (acfg->fp, ".thumb_func\n");
+       /* The code below should be 12 bytes long */
+       fprintf (acfg->fp, "ldr ip, [pc, #8]\n");
+       /* thumb can't encode ld pc, [pc, ip] */
+       fprintf (acfg->fp, "add ip, pc, ip\n");
+       fprintf (acfg->fp, "ldr ip, [ip, #0]\n");
+       fprintf (acfg->fp, "bx ip\n");
+       emit_symbol_diff (acfg, acfg->got_symbol, ".", ((acfg->plt_got_offset_base + index) * sizeof (gpointer)) + 4);
+       emit_int32 (acfg, acfg->plt_got_info_offsets [index]);
+#else
+       g_assert_not_reached ();
+#endif
+}
+
 /*
  * arch_emit_specific_trampoline:
  *
@@ -786,6 +835,7 @@ arch_emit_specific_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size
         * - all the trampolines should be of the same length.
         */
 #if defined(TARGET_AMD64)
+#if defined(__default_codegen__)
        /* This should be exactly 16 bytes long */
        *tramp_size = 16;
        /* call *<offset>(%rip) */
@@ -794,8 +844,61 @@ arch_emit_specific_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size
        emit_byte (acfg, '\x15');
        emit_symbol_diff (acfg, acfg->got_symbol, ".", (offset * sizeof (gpointer)) - 4);
        /* This should be relative to the start of the trampoline */
-       emit_symbol_diff (acfg, acfg->got_symbol, ".", (offset * sizeof (gpointer)) - 4 + 19);
+       emit_symbol_diff (acfg, acfg->got_symbol, ".", ((offset+1) * sizeof (gpointer)) + 7);
        emit_zero_bytes (acfg, 5);
+#elif defined(__native_client_codegen__)
+       guint8 buf [256];
+       guint8 *buf_aligned = ALIGN_TO(buf, kNaClAlignment);
+       guint8 *code = buf_aligned;
+       guint8 *call_start;
+       size_t call_len;
+       int got_offset;
+
+       /* Emit this call in 'code' so we can find out how long it is. */
+       amd64_call_reg (code, AMD64_R11);
+       call_start = mono_arch_nacl_skip_nops (buf_aligned);
+       call_len = code - call_start;
+
+       /* The tramp_size is twice the NaCl alignment because it starts with */ 
+       /* a call which needs to be aligned to the end of the boundary.      */
+       *tramp_size = kNaClAlignment*2;
+       {
+               /* Emit nops to align call site below which is 7 bytes plus */
+               /* the length of the call sequence emitted above.           */
+               /* Note: this requires the specific trampoline starts on a  */
+               /* kNaclAlignedment aligned address, which it does because  */
+               /* it's its own function that is aligned.                   */
+               guint8 nop_buf[256];
+               guint8 *nopbuf_aligned = ALIGN_TO (nop_buf, kNaClAlignment);
+               guint8 *nopbuf_end = mono_arch_nacl_pad (nopbuf_aligned, kNaClAlignment - 7 - (call_len));
+               emit_bytes (acfg, nopbuf_aligned, nopbuf_end - nopbuf_aligned);
+       }
+       /* The trampoline is stored at the offset'th pointer, the -4 is  */
+       /* present because RIP relative addressing starts at the end of  */
+       /* the current instruction, while the label "." is relative to   */
+       /* the beginning of the current asm location, which in this case */
+       /* is not the mov instruction, but the offset itself, due to the */
+       /* way the bytes and ints are emitted here.                      */
+       got_offset = (offset * sizeof(gpointer)) - 4;
+
+       /* mov <OFFSET>(%rip), %r11d */
+       emit_byte (acfg, '\x45');
+       emit_byte (acfg, '\x8b');
+       emit_byte (acfg, '\x1d');
+       emit_symbol_diff (acfg, acfg->got_symbol, ".", got_offset);
+
+       /* naclcall %r11 */
+       emit_bytes (acfg, call_start, call_len);
+
+       /* The arg is stored at the offset+1 pointer, relative to beginning */
+       /* of trampoline: 7 for mov, plus the call length, and 1 for push.  */
+       got_offset = ((offset + 1) * sizeof(gpointer)) + 7 + call_len + 1;
+
+       /* We can't emit this data directly, hide in a "push imm32" */
+       emit_byte (acfg, '\x68'); /* push */
+       emit_symbol_diff (acfg, acfg->got_symbol, ".", got_offset);
+       emit_alignment (acfg, kNaClAlignment);
+#endif /*__native_client_codegen__*/
 #elif defined(TARGET_ARM)
        guint8 buf [128];
        guint8 *code;
@@ -982,6 +1085,7 @@ static void
 arch_emit_static_rgctx_trampoline (MonoAotCompile *acfg, int offset, int *tramp_size)
 {
 #if defined(TARGET_AMD64)
+#if defined(__default_codegen__)
        /* This should be exactly 13 bytes long */
        *tramp_size = 13;
 
@@ -995,6 +1099,31 @@ arch_emit_static_rgctx_trampoline (MonoAotCompile *acfg, int offset, int *tramp_
        emit_byte (acfg, '\xff');
        emit_byte (acfg, '\x25');
        emit_symbol_diff (acfg, acfg->got_symbol, ".", ((offset + 1) * sizeof (gpointer)) - 4);
+#elif defined(__native_client_codegen__)
+       guint8 buf [128];
+       guint8 *buf_aligned = ALIGN_TO(buf, kNaClAlignment);
+       guint8 *code = buf_aligned;
+
+       /* mov <OFFSET>(%rip), %r10d */
+       emit_byte (acfg, '\x45');
+       emit_byte (acfg, '\x8b');
+       emit_byte (acfg, '\x15');
+       emit_symbol_diff (acfg, acfg->got_symbol, ".", (offset * sizeof (gpointer)) - 4);
+
+       /* mov <OFFSET>(%rip), %r11d */
+       emit_byte (acfg, '\x45');
+       emit_byte (acfg, '\x8b');
+       emit_byte (acfg, '\x1d');
+       emit_symbol_diff (acfg, acfg->got_symbol, ".", ((offset + 1) * sizeof (gpointer)) - 4);
+
+       /* nacljmp *%r11 */
+       amd64_jump_reg (code, AMD64_R11);
+       emit_bytes (acfg, buf_aligned, code - buf_aligned);
+
+       emit_alignment (acfg, kNaClAlignment);
+       *tramp_size = kNaClAlignment;
+#endif /*__native_client_codegen__*/
+
 #elif defined(TARGET_ARM)
        guint8 buf [128];
        guint8 *code;
@@ -1104,50 +1233,74 @@ arch_emit_imt_thunk (MonoAotCompile *acfg, int offset, int *tramp_size)
 {
 #if defined(TARGET_AMD64)
        guint8 *buf, *code;
+#if defined(__native_client_codegen__)
+       guint8 *buf_alloc;
+#endif
        guint8 *labels [3];
+       guint8 mov_buf[3];
+       guint8 *mov_buf_ptr = mov_buf;
 
+       const int kSizeOfMove = 7;
+#if defined(__default_codegen__)
        code = buf = g_malloc (256);
+#elif defined(__native_client_codegen__)
+       buf_alloc = g_malloc (256 + kNaClAlignment + kSizeOfMove);
+       buf = ((guint)buf_alloc + kNaClAlignment) & ~kNaClAlignmentMask;
+       /* The RIP relative move below is emitted first */
+       buf += kSizeOfMove;
+       code = buf;
+#endif
 
        /* FIXME: Optimize this, i.e. use binary search etc. */
        /* Maybe move the body into a separate function (slower, but much smaller) */
 
-       /* R11 is a free register */
+       /* MONO_ARCH_IMT_SCRATCH_REG is a free register */
 
        labels [0] = code;
-       amd64_alu_membase_imm (code, X86_CMP, AMD64_R11, 0, 0);
+       amd64_alu_membase_imm (code, X86_CMP, MONO_ARCH_IMT_SCRATCH_REG, 0, 0);
        labels [1] = code;
-       amd64_branch8 (code, X86_CC_Z, FALSE, 0);
+       amd64_branch8 (code, X86_CC_Z, 0, FALSE);
 
        /* Check key */
-       amd64_alu_membase_reg (code, X86_CMP, AMD64_R11, 0, MONO_ARCH_IMT_REG);
+       amd64_alu_membase_reg_size (code, X86_CMP, MONO_ARCH_IMT_SCRATCH_REG, 0, MONO_ARCH_IMT_REG, sizeof (gpointer));
        labels [2] = code;
-       amd64_branch8 (code, X86_CC_Z, FALSE, 0);
+       amd64_branch8 (code, X86_CC_Z, 0, FALSE);
 
        /* Loop footer */
-       amd64_alu_reg_imm (code, X86_ADD, AMD64_R11, 2 * sizeof (gpointer));
+       amd64_alu_reg_imm (code, X86_ADD, MONO_ARCH_IMT_SCRATCH_REG, 2 * sizeof (gpointer));
        amd64_jump_code (code, labels [0]);
 
        /* Match */
        mono_amd64_patch (labels [2], code);
-       amd64_mov_reg_membase (code, AMD64_R11, AMD64_R11, sizeof (gpointer), 8);
-       amd64_jump_membase (code, AMD64_R11, 0);
+       amd64_mov_reg_membase (code, MONO_ARCH_IMT_SCRATCH_REG, MONO_ARCH_IMT_SCRATCH_REG, sizeof (gpointer), sizeof (gpointer));
+       amd64_jump_membase (code, MONO_ARCH_IMT_SCRATCH_REG, 0);
 
        /* No match */
        /* FIXME: */
        mono_amd64_patch (labels [1], code);
        x86_breakpoint (code);
 
-       amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 12345678, 8);
-
-       /* mov <OFFSET>(%rip), %r11 */
-       emit_byte (acfg, '\x4d');
-       emit_byte (acfg, '\x8b');
-       emit_byte (acfg, '\x1d');
+       /* mov <OFFSET>(%rip), MONO_ARCH_IMT_SCRATCH_REG */
+       amd64_emit_rex (mov_buf_ptr, sizeof(gpointer), MONO_ARCH_IMT_SCRATCH_REG, 0, AMD64_RIP);
+       *(mov_buf_ptr)++ = (unsigned char)0x8b; /* mov opcode */
+       x86_address_byte (mov_buf_ptr, 0, MONO_ARCH_IMT_SCRATCH_REG & 0x7, 5);
+       emit_bytes (acfg, mov_buf, mov_buf_ptr - mov_buf);
        emit_symbol_diff (acfg, acfg->got_symbol, ".", (offset * sizeof (gpointer)) - 4);
 
        emit_bytes (acfg, buf, code - buf);
        
-       *tramp_size = code - buf + 7;
+       *tramp_size = code - buf + kSizeOfMove;
+#if defined(__native_client_codegen__)
+       /* The tramp will be padded to the next kNaClAlignment bundle. */
+       *tramp_size = ALIGN_TO ((*tramp_size), kNaClAlignment);
+#endif
+
+#if defined(__default_codegen__)
+       g_free (buf);
+#elif defined(__native_client_codegen__)
+       g_free (buf_alloc); 
+#endif
+
 #elif defined(TARGET_X86)
        guint8 *buf, *code;
 #ifdef __native_client_codegen__
@@ -1155,11 +1308,11 @@ arch_emit_imt_thunk (MonoAotCompile *acfg, int offset, int *tramp_size)
 #endif
        guint8 *labels [3];
 
-#ifdef __native_client_codegen__
+#if defined(__default_codegen__)
+       code = buf = g_malloc (256);
+#elif defined(__native_client_codegen__)
        buf_alloc = g_malloc (256 + kNaClAlignment);
        code = buf = ((guint)buf_alloc + kNaClAlignment) & ~kNaClAlignmentMask;
-#else
-       code = buf = g_malloc (256);
 #endif
 
        /* Allocate a temporary stack slot */
@@ -1212,6 +1365,13 @@ arch_emit_imt_thunk (MonoAotCompile *acfg, int offset, int *tramp_size)
        emit_bytes (acfg, buf, code - buf);
        
        *tramp_size = code - buf;
+
+#if defined(__default_codegen__)
+       g_free (buf);
+#elif defined(__native_client_codegen__)
+       g_free (buf_alloc); 
+#endif
+
 #elif defined(TARGET_ARM)
        guint8 buf [128];
        guint8 *code, *code2, *labels [16];
@@ -3111,6 +3271,7 @@ emit_and_reloc_code (MonoAotCompile *acfg, MonoMethod *method, guint8 *code, gui
                
                                                /* Nullify the patch */
                                                patch_info->type = MONO_PATCH_INFO_NONE;
+                                               plt_entry->jit_used = TRUE;
                                        }
                                }
 
@@ -3728,7 +3889,7 @@ emit_exception_debug_info (MonoAotCompile *acfg, MonoCompile *cfg)
        if (cfg->gc_map) {
                encode_value (cfg->gc_map_size, p, &p);
                /* The GC map requires 4 bytes of alignment */
-               while ((guint64)p % 4)
+               while ((gsize)p % 4)
                        p ++;
                memcpy (p, cfg->gc_map, cfg->gc_map_size);
                p += cfg->gc_map_size;
@@ -3830,14 +3991,49 @@ emit_klass_info (MonoAotCompile *acfg, guint32 token)
        return res;
 }
 
+static char*
+get_plt_entry_debug_sym (MonoAotCompile *acfg, MonoJumpInfo *ji, GHashTable *cache)
+{
+       char *debug_sym;
+
+       switch (ji->type) {
+       case MONO_PATCH_INFO_METHOD:
+               debug_sym = get_debug_sym (ji->data.method, "plt_", cache);
+               break;
+       case MONO_PATCH_INFO_INTERNAL_METHOD:
+               debug_sym = g_strdup_printf ("plt__jit_icall_%s", ji->data.name);
+               break;
+       case MONO_PATCH_INFO_CLASS_INIT:
+               debug_sym = g_strdup_printf ("plt__class_init_%s", mono_type_get_name (&ji->data.klass->byval_arg));
+               sanitize_symbol (debug_sym);
+               break;
+       case MONO_PATCH_INFO_RGCTX_FETCH:
+               debug_sym = g_strdup_printf ("plt__rgctx_fetch_%d", acfg->label_generator ++);
+               break;
+       case MONO_PATCH_INFO_ICALL_ADDR: {
+               char *s = get_debug_sym (ji->data.method, "", cache);
+               
+               debug_sym = g_strdup_printf ("plt__icall_native_%s", s);
+               g_free (s);
+               break;
+       }
+       case MONO_PATCH_INFO_JIT_ICALL_ADDR:
+               debug_sym = g_strdup_printf ("plt__jit_icall_native_%s", ji->data.name);
+               break;
+       case MONO_PATCH_INFO_GENERIC_CLASS_INIT:
+               debug_sym = g_strdup_printf ("plt__generic_class_init");
+               break;
+       default:
+               break;
+       }
+
+       return debug_sym;
+}
+
 /*
  * Calls made from AOTed code are routed through a table of jumps similar to the
- * ELF PLT (Program Linkage Table). The differences are the following:
- * - the ELF PLT entries make an indirect jump though the GOT so they expect the
- *   GOT pointer to be in EBX. We want to avoid this, so our table contains direct
- *   jumps. This means the jumps need to be patched when the address of the callee is
- *   known. Initially the PLT entries jump to code which transfers control to the
- *   AOT runtime through the first PLT entry.
+ * ELF PLT (Program Linkage Table). Initially the PLT entries jump to code which transfers
+ * control to the AOT runtime through a trampoline.
  */
 static void
 emit_plt (MonoAotCompile *acfg)
@@ -3852,27 +4048,23 @@ emit_plt (MonoAotCompile *acfg)
        sprintf (symbol, "plt");
 
        emit_section_change (acfg, ".text", 0);
-       emit_alignment (acfg, 16);
+       emit_alignment (acfg, NACL_SIZE(16, kNaClAlignment));
        emit_label (acfg, symbol);
        emit_label (acfg, acfg->plt_symbol);
 
        for (i = 0; i < acfg->plt_offset; ++i) {
-               char label [128];
                char *debug_sym = NULL;
                MonoPltEntry *plt_entry = NULL;
                MonoJumpInfo *ji;
 
-               if (i == 0) {
+               if (i == 0)
                        /* 
-                        * The first plt entry is used to transfer code to the AOT loader. 
+                        * The first plt entry is unused.
                         */
-                       arch_emit_plt_entry (acfg, i);
                        continue;
-               }
 
                plt_entry = g_hash_table_lookup (acfg->plt_offset_to_entry, GUINT_TO_POINTER (i));
                ji = plt_entry->ji;
-               sprintf (label, "%s", plt_entry->symbol);
 
                if (acfg->llvm) {
                        /*
@@ -3885,9 +4077,8 @@ emit_plt (MonoAotCompile *acfg)
                        if (ji && is_direct_callable (acfg, NULL, ji) && !acfg->use_bin_writer) {
                                MonoCompile *callee_cfg = g_hash_table_lookup (acfg->method_to_cfg, ji->data.method);
 
-                               if (acfg->thumb_mixed) {
+                               if (acfg->thumb_mixed && !callee_cfg->compile_llvm) {
                                        /* LLVM calls the PLT entries using bl, so emit a stub */
-                                       /* FIXME: Too much overhead on every call */
                                        emit_label (acfg, plt_entry->llvm_symbol);
                                        fprintf (acfg->fp, ".thumb_func\n");
                                        fprintf (acfg->fp, "bx pc\n");
@@ -3901,62 +4092,71 @@ emit_plt (MonoAotCompile *acfg)
                        }
                }
 
-               emit_label (acfg, plt_entry->llvm_symbol);
+               if (acfg->aot_opts.write_symbols)
+                       plt_entry->debug_sym = get_plt_entry_debug_sym (acfg, ji, cache);
+               debug_sym = plt_entry->debug_sym;
+
+               if (acfg->thumb_mixed && !plt_entry->jit_used)
+                       /* Emit only a thumb version */
+                       continue;
 
-               if (acfg->thumb_mixed) {
-                       /* LLVM calls the PLT entries using bl, so emit a stub */
-                       /* FIXME: Too much overhead on every call */
-                       fprintf (acfg->fp, ".thumb_func\n");
-                       fprintf (acfg->fp, "bx pc\n");
-                       fprintf (acfg->fp, "nop\n");
-                       fprintf (acfg->fp, ".arm\n");
+               if (!acfg->thumb_mixed)
+                       emit_label (acfg, plt_entry->llvm_symbol);
+
+               if (debug_sym) {
+                       emit_local_symbol (acfg, debug_sym, NULL, TRUE);
+                       emit_label (acfg, debug_sym);
                }
 
-               emit_label (acfg, label);
+               emit_label (acfg, plt_entry->symbol);
 
-               if (acfg->aot_opts.write_symbols) {
-                       switch (ji->type) {
-                       case MONO_PATCH_INFO_METHOD:
-                               debug_sym = get_debug_sym (ji->data.method, "plt_", cache);
-                               break;
-                       case MONO_PATCH_INFO_INTERNAL_METHOD:
-                               debug_sym = g_strdup_printf ("plt__jit_icall_%s", ji->data.name);
-                               break;
-                       case MONO_PATCH_INFO_CLASS_INIT:
-                               debug_sym = g_strdup_printf ("plt__class_init_%s", mono_type_get_name (&ji->data.klass->byval_arg));
-                               sanitize_symbol (debug_sym);
-                               break;
-                       case MONO_PATCH_INFO_RGCTX_FETCH:
-                               debug_sym = g_strdup_printf ("plt__rgctx_fetch_%d", acfg->label_generator ++);
-                               break;
-                       case MONO_PATCH_INFO_ICALL_ADDR: {
-                               char *s = get_debug_sym (ji->data.method, "", cache);
-                                       
-                               debug_sym = g_strdup_printf ("plt__icall_native_%s", s);
-                               g_free (s);
-                               break;
-                       }
-                       case MONO_PATCH_INFO_JIT_ICALL_ADDR:
-                               debug_sym = g_strdup_printf ("plt__jit_icall_native_%s", ji->data.name);
-                               break;
-                       case MONO_PATCH_INFO_GENERIC_CLASS_INIT:
-                               debug_sym = g_strdup_printf ("plt__generic_class_init");
-                               break;
-                       default:
-                               break;
+               arch_emit_plt_entry (acfg, i);
+
+               if (debug_sym)
+                       emit_symbol_size (acfg, debug_sym, ".");
+       }
+
+       if (acfg->thumb_mixed) {
+               /* 
+                * Emit a separate set of PLT entries using thumb2 which is called by LLVM generated
+                * code.
+                */
+               for (i = 0; i < acfg->plt_offset; ++i) {
+                       char *debug_sym = NULL;
+                       MonoPltEntry *plt_entry = NULL;
+                       MonoJumpInfo *ji;
+
+                       if (i == 0)
+                               continue;
+
+                       plt_entry = g_hash_table_lookup (acfg->plt_offset_to_entry, GUINT_TO_POINTER (i));
+                       ji = plt_entry->ji;
+
+                       if (ji && is_direct_callable (acfg, NULL, ji) && !acfg->use_bin_writer)
+                               continue;
+
+                       /* Skip plt entries not actually called by LLVM code */
+                       if (!plt_entry->llvm_used)
+                               continue;
+
+                       if (acfg->aot_opts.write_symbols) {
+                               if (plt_entry->debug_sym)
+                                       debug_sym = g_strdup_printf ("%s_thumb", plt_entry->debug_sym);
                        }
 
                        if (debug_sym) {
                                emit_local_symbol (acfg, debug_sym, NULL, TRUE);
                                emit_label (acfg, debug_sym);
                        }
-               }
 
-               arch_emit_plt_entry (acfg, i);
+                       emit_label (acfg, plt_entry->llvm_symbol);
 
-               if (debug_sym) {
-                       emit_symbol_size (acfg, debug_sym, ".");
-                       g_free (debug_sym);
+                       arch_emit_llvm_plt_entry (acfg, i);
+
+                       if (debug_sym) {
+                               emit_symbol_size (acfg, debug_sym, ".");
+                               g_free (debug_sym);
+                       }
                }
        }
 
@@ -4814,6 +5014,7 @@ mono_aot_get_plt_symbol (MonoJumpInfoType type, gconstpointer data)
                return NULL;
 
        plt_entry = get_plt_entry (llvm_acfg, ji);
+       plt_entry->llvm_used = TRUE;
 
        return g_strdup_printf (plt_entry->llvm_symbol);
 }
@@ -4969,7 +5170,17 @@ emit_code (MonoAotCompile *acfg)
         * Emit some padding so the local symbol for the first method doesn't have the
         * same address as 'methods'.
         */
+#if defined(__default_codegen__)
        emit_zero_bytes (acfg, 16);
+#elif defined(__native_client_codegen__)
+       {
+               const int kPaddingSize = 16;
+               guint8 pad_buffer[kPaddingSize];
+               mono_arch_nacl_pad (pad_buffer, kPaddingSize);
+               emit_bytes (acfg, pad_buffer, kPaddingSize);
+       }
+#endif
+       
 
        for (l = acfg->method_order; l != NULL; l = l->next) {
                MonoCompile *cfg;
@@ -6193,7 +6404,11 @@ compile_asm (MonoAotCompile *acfg)
 #endif
 
 #ifdef __native_client_codegen__
+#if defined(TARGET_AMD64)
+#define AS_NAME "nacl64-as"
+#else
 #define AS_NAME "nacl-as"
+#endif
 #else
 #define AS_NAME "as"
 #endif
@@ -6592,7 +6807,6 @@ mono_compile_assembly (MonoAssembly *ass, guint32 opts, const char *aot_options)
                 */
                sprintf (symbol, "thumb_end");
                emit_section_change (acfg, ".text", 0);
-               emit_global (acfg, symbol, FALSE);
                emit_label (acfg, symbol);
                fprintf (acfg->fp, ".skip 16\n");