This commit was manufactured by cvs2svn to create branch 'mono-1-0'.
[mono.git] / mono / mini / mini-x86.c
index b8be3a7166e1e94c548c73973511ce655ce17d9d..14710b31792c017b6ad1af633ac4c805b86692bc 100644 (file)
@@ -33,8 +33,6 @@ static gint lmf_tls_offset = -1;
 
 #define SIGNAL_STACK_SIZE (64 * 1024)
 
-static gpointer mono_arch_get_lmf_addr (void);
-
 const char*
 mono_arch_regname (int reg) {
        switch (reg) {
@@ -702,7 +700,11 @@ if (ins->flags & MONO_INST_BRLABEL) { \
                x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
         } else { \
                mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
-               x86_branch32 (code, cond, 0, sign); \
+               if ((cfg->opt & MONO_OPT_BRANCH) && \
+                    x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
+                       x86_branch8 (code, cond, 0, sign); \
+                else \
+                       x86_branch32 (code, cond, 0, sign); \
         } \
 } else { \
         if (ins->inst_true_bb->native_offset) { \
@@ -1313,9 +1315,49 @@ alloc_int_reg (MonoCompile *cfg, InstList *curinst, MonoInst *ins, int sym_reg,
 #endif
 
 /* flags used in reginfo->flags */
-#define MONO_X86_FP_NEEDS_LOAD_SPILL   1
-#define MONO_X86_FP_NEEDS_SPILL                        2
-#define MONO_X86_FP_NEEDS_LOAD                 4
+enum {
+       MONO_X86_FP_NEEDS_LOAD_SPILL    = 1 << 0,
+       MONO_X86_FP_NEEDS_SPILL                 = 1 << 1,
+       MONO_X86_FP_NEEDS_LOAD                  = 1 << 2,
+       MONO_X86_REG_NOT_ECX                    = 1 << 3,
+       MONO_X86_REG_EAX                                = 1 << 4,
+       MONO_X86_REG_EDX                                = 1 << 5,
+       MONO_X86_REG_ECX                                = 1 << 6
+};
+
+static int
+mono_x86_alloc_int_reg (MonoCompile *cfg, InstList *tmp, MonoInst *ins, guint32 dest_mask, int sym_reg, int flags)
+{
+       int val;
+       int test_mask = dest_mask;
+
+       if (flags & MONO_X86_REG_EAX)
+               test_mask &= (1 << X86_EAX);
+       else if (flags & MONO_X86_REG_EDX)
+               test_mask &= (1 << X86_EDX);
+       else if (flags & MONO_X86_REG_ECX)
+               test_mask &= (1 << X86_ECX);
+       else if (flags & MONO_X86_REG_NOT_ECX)
+               test_mask &= ~ (1 << X86_ECX);
+
+       val = mono_regstate_alloc_int (cfg->rs, test_mask);
+       if (val >= 0 && test_mask != dest_mask)
+               DEBUG(g_print ("\tUsed flag to allocate reg %s for R%u\n", mono_arch_regname (val), sym_reg));
+
+       if (val < 0 && (flags & MONO_X86_REG_NOT_ECX)) {
+               DEBUG(g_print ("\tFailed to allocate flag suggested mask (%u) but exluding ECX\n", test_mask));
+               val = mono_regstate_alloc_int (cfg->rs, (dest_mask & (~1 << X86_ECX)));
+       }
+
+       if (val < 0) {
+               val = mono_regstate_alloc_int (cfg->rs, dest_mask);
+               if (val < 0)
+                       val = get_register_spilling (cfg, tmp, ins, dest_mask, sym_reg);
+       }
+
+       return val;
+}
+
 
 /*#include "cprop.c"*/
 
@@ -1385,6 +1427,9 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                reginfo1 [ins->sreg1 + 1].last_use = i;
                                if (reginfo1 [ins->sreg1 + 1].born_in == 0 || reginfo1 [ins->sreg1 + 1].born_in > i)
                                        reginfo1 [ins->sreg1 + 1].born_in = i;
+
+                               reginfo1 [ins->sreg1].flags |= MONO_X86_REG_EAX;
+                               reginfo1 [ins->sreg1 + 1].flags |= MONO_X86_REG_EDX;
                        }
                } else {
                        ins->sreg1 = -1;
@@ -1416,6 +1461,10 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                if (reginfo2 [ins->sreg2 + 1].born_in == 0 || reginfo2 [ins->sreg2 + 1].born_in > i)
                                        reginfo2 [ins->sreg2 + 1].born_in = i;
                        }
+                       if (spec [MONO_INST_CLOB] == 's') {
+                               reginfo2 [ins->sreg1].flags |= MONO_X86_REG_NOT_ECX;
+                               reginfo2 [ins->sreg2].flags |= MONO_X86_REG_ECX;
+                       }
                } else {
                        ins->sreg2 = -1;
                }
@@ -1444,10 +1493,14 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                reginfod [ins->dreg + 1].last_use = i;
                                if (reginfod [ins->dreg + 1].born_in == 0 || reginfod [ins->dreg + 1].born_in > i)
                                        reginfod [ins->dreg + 1].born_in = i;
-                       } 
+
+                               reginfod [ins->dreg].flags |= MONO_X86_REG_EAX;
+                               reginfod [ins->dreg + 1].flags |= MONO_X86_REG_EDX;
+                       }
                } else {
                        ins->dreg = -1;
                }
+
                reversed = inst_list_prepend (cfg->mempool, reversed, ins);
                ++i;
                ins = ins->next;
@@ -1492,9 +1545,8 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                 * copy from this to ECX.
                                 */
                                if (val == X86_ECX && ins->dreg != ins->sreg2) {
-                                       int new_dest = mono_regstate_alloc_int (rs, dest_mask);
-                                       if (new_dest < 0)
-                                               new_dest = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
+                                       int new_dest;
+                                       new_dest = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
                                        g_assert (new_dest >= 0);
                                        DEBUG (g_print ("\tclob:s changing dreg R%d to %s from ECX\n", ins->dreg, mono_arch_regname (new_dest)));
 
@@ -1542,11 +1594,6 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                rs->isymbolic [X86_ECX] = ins->sreg2;
                                ins->sreg2 = X86_ECX;
                                rs->ifree_mask &= ~ (1 << X86_ECX);
-
-                               if (clob_dreg != -1 && reg_is_freeable (ins->dreg) && clob_dreg >= 0 && reginfo [clob_dreg].born_in >= i) {
-                                       DEBUG (g_print ("\tfreeable moved reg %s (R%d) (born in %d)\n", mono_arch_regname (ins->dreg), clob_dreg, reginfo [clob_dreg].born_in));
-                                       mono_regstate_free_int (rs, ins->dreg);
-                               }
                        }
                } else if (spec [MONO_INST_CLOB] == 'd') { /* division */
                        int dest_reg = X86_EAX;
@@ -1607,12 +1654,23 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                        src2_mask = 1 << X86_ECX;
                }
                if (spec [MONO_INST_DEST] == 'l') {
-                       if (!(rs->ifree_mask & (1 << X86_EAX))) {
+                       int hreg;
+                       val = rs->iassign [ins->dreg];
+                       /* check special case when dreg have been moved from ecx (clob shift) */
+                       if (spec [MONO_INST_CLOB] == 's' && clob_dreg != -1)
+                               hreg = clob_dreg + 1;
+                       else
+                               hreg = ins->dreg + 1;
+
+                       /* base prev_dreg on fixed hreg, handle clob case */
+                       val = hreg - 1;
+
+                       if (val != rs->isymbolic [X86_EAX] && !(rs->ifree_mask & (1 << X86_EAX))) {
                                DEBUG (g_print ("\t(long-low) forced spill of R%d\n", rs->isymbolic [X86_EAX]));
                                get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EAX]);
                                mono_regstate_free_int (rs, X86_EAX);
                        }
-                       if (!(rs->ifree_mask & (1 << X86_EDX))) {
+                       if (hreg != rs->isymbolic [X86_EDX] && !(rs->ifree_mask & (1 << X86_EDX))) {
                                DEBUG (g_print ("\t(long-high) forced spill of R%d\n", rs->isymbolic [X86_EDX]));
                                get_register_force_spilling (cfg, tmp, ins, rs->isymbolic [X86_EDX]);
                                mono_regstate_free_int (rs, X86_EDX);
@@ -1650,16 +1708,14 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                        /* the register gets spilled after this inst */
                                        spill = -val -1;
                                }
-                               val = mono_regstate_alloc_int (rs, dest_mask);
-                               if (val < 0) /* todo: should we force reg into eax, for opt reasons? */
-                                       val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
+                               val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
                                rs->iassign [ins->dreg] = val;
                                if (spill)
                                        create_spilled_store (cfg, spill, val, prev_dreg, ins);
                        }
 
                        DEBUG (g_print ("\tassigned dreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg - 1));
-
                        rs->isymbolic [val] = hreg - 1;
                        ins->dreg = val;
                        
@@ -1670,21 +1726,20 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                        /* the register gets spilled after this inst */
                                        spill = -val -1;
                                }
-                               val = mono_regstate_alloc_int (rs, dest_mask);
-                               if (val < 0) /* todo: should we force reg into edx, for opt reasons? */
-                                       val = get_register_spilling (cfg, tmp, ins, dest_mask, hreg);
+                               val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
                                rs->iassign [hreg] = val;
                                if (spill)
                                        create_spilled_store (cfg, spill, val, hreg, ins);
                        }
 
-                       DEBUG (g_print ("\tassigned hreg (long) %s to dest R%d\n", mono_arch_regname (val), hreg));
+                       DEBUG (g_print ("\tassigned hreg (long-high) %s to dest R%d\n", mono_arch_regname (val), hreg));
                        rs->isymbolic [val] = hreg;
                        /* save reg allocating into unused */
                        ins->unused = val;
 
+                       /* check if we can free our long reg */
                        if (reg_is_freeable (val) && hreg >= 0 && reginfo [hreg].born_in >= i) {
-                               DEBUG (g_print ("\tfreeable %s (R%d)\n", mono_arch_regname (val), hreg));
+                               DEBUG (g_print ("\tfreeable %s (R%d) (born in %d)\n", mono_arch_regname (val), hreg, reginfo [hreg].born_in));
                                mono_regstate_free_int (rs, val);
                        }
                }
@@ -1709,9 +1764,7 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                        /* the register gets spilled after this inst */
                                        spill = -val -1;
                                }
-                               val = mono_regstate_alloc_int (rs, dest_mask);
-                               if (val < 0)
-                                       val = get_register_spilling (cfg, tmp, ins, dest_mask, ins->dreg);
+                               val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, ins->dreg, reginfo [ins->dreg].flags);
                                rs->iassign [ins->dreg] = val;
                                if (spill)
                                        create_spilled_store (cfg, spill, val, prev_dreg, ins);
@@ -1730,9 +1783,7 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                                /* the register gets spilled after this inst */
                                                spill = -val -1;
                                        }
-                                       val = mono_regstate_alloc_int (rs, dest_mask);
-                                       if (val < 0)
-                                               val = get_register_spilling (cfg, tmp, ins, dest_mask, hreg);
+                                       val = mono_x86_alloc_int_reg (cfg, tmp, ins, dest_mask, hreg, reginfo [hreg].flags);
                                        rs->iassign [hreg] = val;
                                        if (spill)
                                                create_spilled_store (cfg, spill, val, hreg, ins);
@@ -1820,8 +1871,10 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                        DEBUG (g_print ("\tassigned sreg1 (long-high) %s to sreg1 R%d\n", mono_arch_regname (ins->unused), ins->sreg1 + 1));
 
                        ins->sreg1 = ins->dreg;
-                       /* no need for this, we know that src1=dest in this cases */
-                       /*ins->inst_c0 = ins->unused;*/
+                       /* 
+                        * No need for saving the reg, we know that src1=dest in this cases
+                        * ins->inst_c0 = ins->unused;
+                        */
 
                        /* make sure that we remove them from free mask */
                        rs->ifree_mask &= ~ (1 << ins->dreg);
@@ -1851,9 +1904,7 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                        DEBUG (g_print ("\tfast assigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
                                } else {
                                        //g_assert (val == -1); /* source cannot be spilled */
-                                       val = mono_regstate_alloc_int (rs, src1_mask);
-                                       if (val < 0)
-                                               val = get_register_spilling (cfg, tmp, ins, src1_mask, ins->sreg1);
+                                       val = mono_x86_alloc_int_reg (cfg, tmp, ins, src1_mask, ins->sreg1, reginfo [ins->sreg1].flags);
                                        rs->iassign [ins->sreg1] = val;
                                        DEBUG (g_print ("\tassigned sreg1 %s to R%d\n", mono_arch_regname (val), ins->sreg1));
                                }
@@ -1917,9 +1968,7 @@ mono_arch_local_regalloc (MonoCompile *cfg, MonoBasicBlock *bb)
                                        /* the register gets spilled after this inst */
                                        spill = -val -1;
                                }
-                               val = mono_regstate_alloc_int (rs, src2_mask);
-                               if (val < 0)
-                                       val = get_register_spilling (cfg, tmp, ins, src2_mask, ins->sreg2);
+                               val = mono_x86_alloc_int_reg (cfg, tmp, ins, src2_mask, ins->sreg2, reginfo [ins->sreg2].flags);
                                rs->iassign [ins->sreg2] = val;
                                DEBUG (g_print ("\tassigned sreg2 %s to R%d\n", mono_arch_regname (val), ins->sreg2));
                                if (spill)
@@ -2090,6 +2139,10 @@ x86_pop_reg (code, X86_ECX); \
 x86_pop_reg (code, X86_EDX); \
 x86_pop_reg (code, X86_EAX);
 
+/* benchmark and set based on cpu */
+#define LOOP_ALIGNMENT 8
+#define bb_is_loop_start(bb) ((bb)->nesting && ((bb)->in_count == 1))
+
 void
 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 {
@@ -2105,9 +2158,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                peephole_pass (cfg, bb);
 
        if (cfg->opt & MONO_OPT_LOOP) {
-               int pad, align = 8;
+               int pad, align = LOOP_ALIGNMENT;
                /* set alignment depending on cpu */
-               if (bb->nesting && (bb->in_count == 1) && (pad = (cfg->code_len & (align - 1)))) {
+               if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
                        pad = align - pad;
                        /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
                        x86_padding (code, pad);
@@ -2664,7 +2717,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                                        x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
                                } else {
                                        mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
-                                       x86_jump32 (code, 0);
+                                       if ((cfg->opt & MONO_OPT_BRANCH) &&
+                                           x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
+                                               x86_jump8 (code, 0);
+                                       else 
+                                               x86_jump32 (code, 0);
                                }
                        } else {
                                if (ins->inst_target_bb->native_offset) {
@@ -2895,9 +2952,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        break;          
                case OP_SIN:
                        x86_fsin (code);
+                       x86_fldz (code);
+                       x86_fp_op_reg (code, X86_FADD, 1, TRUE);
                        break;          
                case OP_COS:
                        x86_fcos (code);
+                       x86_fldz (code);
+                       x86_fp_op_reg (code, X86_FADD, 1, TRUE);
                        break;          
                case OP_ABS:
                        x86_fabs (code);
@@ -2932,12 +2993,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
                        x86_fstp (code, 0); /* pop the 1.0 */
                        x86_patch (check_pos, code);
                        x86_patch (end_tan, code);
+                       x86_fldz (code);
+                       x86_fp_op_reg (code, X86_FADD, 1, TRUE);
                        x86_pop_reg (code, X86_EAX);
                        break;
                }
                case OP_ATAN:
                        x86_fld1 (code);
                        x86_fpatan (code);
+                       x86_fldz (code);
+                       x86_fp_op_reg (code, X86_FADD, 1, TRUE);
                        break;          
                case OP_SQRT:
                        x86_fsqrt (code);
@@ -3252,7 +3317,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 void
 mono_arch_register_lowlevel_calls (void)
 {
-       mono_register_jit_icall (mono_arch_get_lmf_addr, "mono_arch_get_lmf_addr", NULL, TRUE);
 }
 
 void
@@ -3368,10 +3432,10 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                x86_push_imm (code, 0);
 
                /* save all caller saved regs */
-               x86_push_reg (code, X86_EBX);
-               x86_push_reg (code, X86_EDI);
-               x86_push_reg (code, X86_ESI);
                x86_push_reg (code, X86_EBP);
+               x86_push_reg (code, X86_ESI);
+               x86_push_reg (code, X86_EDI);
+               x86_push_reg (code, X86_EBX);
 
                /* save method info */
                x86_push_imm (code, method);
@@ -3388,13 +3452,8 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        x86_mov_reg_membase (code, X86_EAX, X86_EAX, lmf_tls_offset, 4);
                }
                else {
-#ifdef HAVE_KW_THREAD
-                       mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
-                                                                (gpointer)"mono_arch_get_lmf_addr");
-#else
                        mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_INTERNAL_METHOD, 
                                                                 (gpointer)"mono_get_lmf_addr");
-#endif
                        x86_call_code (code, 0);
                }
 
@@ -3448,9 +3507,15 @@ mono_arch_emit_prolog (MonoCompile *cfg)
                        bb->max_offset = max_offset;
 
                        if (cfg->prof_options & MONO_PROFILE_COVERAGE)
-                               max_offset += 6; 
+                               max_offset += 6;
+                       /* max alignment for loops */
+                       if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
+                               max_offset += LOOP_ALIGNMENT;
 
                        while (ins) {
+                               if (ins->opcode == OP_LABEL)
+                                       ins->inst_c1 = max_offset;
+                               
                                max_offset += ((guint8 *)ins_spec [ins->opcode])[MONO_INST_LEN];
                                ins = ins->next;
                        }
@@ -3498,7 +3563,42 @@ mono_arch_emit_epilog (MonoCompile *cfg)
        pos = 0;
        
        if (method->save_lmf) {
-               pos = -sizeof (MonoLMF);
+               gint32 prev_lmf_reg;
+
+               /* Find a spare register */
+               switch (sig->ret->type) {
+               case MONO_TYPE_I8:
+               case MONO_TYPE_U8:
+                       prev_lmf_reg = X86_EDI;
+                       cfg->used_int_regs |= (1 << X86_EDI);
+                       break;
+               default:
+                       prev_lmf_reg = X86_EDX;
+                       break;
+               }
+
+               /* reg = previous_lmf */
+               x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, -32, 4);
+
+               /* ecx = lmf */
+               x86_mov_reg_membase (code, X86_ECX, X86_EBP, -28, 4);
+
+               /* *(lmf) = previous_lmf */
+               x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
+
+               /* restore caller saved regs */
+               if (cfg->used_int_regs & (1 << X86_EBX)) {
+                       x86_mov_reg_membase (code, X86_EBX, X86_EBP, -20, 4);
+               }
+
+               if (cfg->used_int_regs & (1 << X86_EDI)) {
+                       x86_mov_reg_membase (code, X86_EDI, X86_EBP, -16, 4);
+               }
+               if (cfg->used_int_regs & (1 << X86_ESI)) {
+                       x86_mov_reg_membase (code, X86_ESI, X86_EBP, -12, 4);
+               }
+
+               /* EBP is restored by LEAVE */
        } else {
                if (cfg->used_int_regs & (1 << X86_EBX)) {
                        pos -= 4;
@@ -3509,29 +3609,9 @@ mono_arch_emit_epilog (MonoCompile *cfg)
                if (cfg->used_int_regs & (1 << X86_ESI)) {
                        pos -= 4;
                }
-       }
 
-       if (pos)
-               x86_lea_membase (code, X86_ESP, X86_EBP, pos);
-       
-       if (method->save_lmf) {
-               /* ebx = previous_lmf */
-               x86_pop_reg (code, X86_EBX);
-               /* edi = lmf */
-               x86_pop_reg (code, X86_EDI);
-               /* *(lmf) = previous_lmf */
-               x86_mov_membase_reg (code, X86_EDI, 0, X86_EBX, 4);
-
-               /* discard method info */
-               x86_pop_reg (code, X86_ESI);
-
-               /* restore caller saved regs */
-               x86_pop_reg (code, X86_EBP);
-               x86_pop_reg (code, X86_ESI);
-               x86_pop_reg (code, X86_EDI);
-               x86_pop_reg (code, X86_EBX);
-
-       } else {
+               if (pos)
+                       x86_lea_membase (code, X86_ESP, X86_EBP, pos);
 
                if (cfg->used_int_regs & (1 << X86_ESI)) {
                        x86_pop_reg (code, X86_ESI);
@@ -3604,21 +3684,6 @@ mono_arch_flush_register_windows (void)
 
 static gboolean tls_offset_inited = FALSE;
 
-#ifdef HAVE_KW_THREAD
-static __thread gpointer mono_lmf_addr;
-#endif
-
-static gpointer
-mono_arch_get_lmf_addr (void)
-{
-#ifdef HAVE_KW_THREAD
-       return mono_lmf_addr;
-#else
-       g_assert_not_reached ();
-       return NULL;
-#endif
-}
-
 void
 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
 {
@@ -3635,12 +3700,13 @@ mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
 
                tls_offset_inited = TRUE;
 
+               code = (guint8*)mono_get_lmf_addr;
+
                if (getenv ("MONO_NPTL")) {
                        /* 
                         * Determine the offset of mono_lfm_addr inside the TLS structures
                         * by disassembling the function above.
                         */
-                       code = (guint8*)&mono_arch_get_lmf_addr;
 
                        /* This is generated by gcc 3.3.2 */
                        if ((code [0] == 0x55) && (code [1] == 0x89) && (code [2] == 0xe5) &&
@@ -3649,6 +3715,12 @@ mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
                                (code [9] == 0x8b) && (code [10] == 0x80)) {
                                lmf_tls_offset = *(int*)&(code [11]);
                        }
+                       else
+                               /* This is generated by gcc-3.4 */
+                               if ((code [0] == 0x55) && (code [1] == 0x89) && (code [2] == 0xe5) &&
+                                       (code [3] == 0x65) && (code [4] == 0xa1)) {
+                                       lmf_tls_offset = *(int*)&(code [5]);
+                               }
                }
        }               
 
@@ -3661,11 +3733,16 @@ mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
 #else
 #ifdef HAVE_PTHREAD_ATTR_GET_NP
                pthread_attr_get_np( self, &attr );
+#elif defined(sun)
+               pthread_attr_init( &attr );
+               pthread_attr_getstacksize( &attr, &stsize );
 #else
 #error "Not implemented"
 #endif
 #endif
+#ifndef sun
                pthread_attr_getstack( &attr, &staddr, &stsize );
+#endif
        }
 
        /* 
@@ -3683,10 +3760,6 @@ mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
        sa.ss_flags = SS_ONSTACK;
        sigaltstack (&sa, NULL);
 #endif
-
-#ifdef HAVE_KW_THREAD
-       mono_lmf_addr = &tls->lmf;
-#endif
 }
 
 void