* Merged in twisti-branch.
[cacao.git] / src / vm / jit / powerpc / asmpart.S
index 8af4da15066fb657b3925106c3b5dd05763e91e6..6b4dd2d6d777910d417a7ba6275c4b4a00eeaf09 100644 (file)
@@ -1,6 +1,6 @@
 /* src/vm/jit/powerpc/asmpart.S - Java-C interface functions for PowerPC
                
-   Copyright (C) 1996-2005, 2006 R. Grafl, A. Krall, C. Kruegel,
+   Copyright (C) 1996-2005, 2006, 2007 R. Grafl, A. Krall, C. Kruegel,
    C. Oates, R. Obermaisser, M. Platter, M. Probst, S. Ring,
    E. Steiner, C. Thalinger, D. Thuernbeck, P. Tomsich, C. Ullrich,
    J. Wenninger, Institut f. Computersprachen - TU Wien
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    02110-1301, USA.
 
-   Contact: cacao@cacaojvm.org
-
-   Authors: Andreas Krall
-            Reinhard Grafl
-            Stefan Ring
-
-   Changes: Christian Thalinger
-            Edwin Steiner
-
-   $Id: asmpart.S 4749 2006-04-11 10:20:18Z twisti $
+   $Id: asmpart.S 7454 2007-03-05 15:40:48Z tbfg $
 
 */
 
        .globl asm_vm_call_method_double
 
        .globl asm_vm_call_method_exception_handler
+       .globl asm_vm_call_method_end
 
        .globl asm_call_jit_compiler
 
        .globl asm_handle_nat_exception
        .globl asm_handle_exception
 
-       .globl asm_wrapper_patcher
+       .globl asm_abstractmethoderror
 
+       .globl asm_patcher_wrapper
+
+#if defined(ENABLE_REPLACEMENT)
        .globl asm_replacement_out
        .globl asm_replacement_in
+#endif
 
        .globl asm_cacheflush
-       .globl asm_initialize_thread_stack
-       .globl asm_perform_threadswitch
-       .globl asm_switchstackandcall
+
+       .globl asm_compare_and_swap
+       .globl asm_memory_barrier
+
        .globl asm_criticalsections
        .globl asm_getclassvalues_atomic
 
        .long   0                         /* line number table size               */
        .long   0                         /* fltsave                              */
        .long   0                         /* intsave                              */
-       .long   0                         /* isleaf                               */
+       .long   0                         /* IsLeaf                               */
        .long   0                         /* IsSync                               */
        .long   0                         /* frame size                           */
-       .long   0                         /* method pointer (pointer to name)     */
+       .long   0                         /* codeinfo pointer                     */
 
 asm_vm_call_method:
 asm_vm_call_method_int:
@@ -115,8 +112,11 @@ asm_vm_call_method_long:
 asm_vm_call_method_float:
 asm_vm_call_method_double:
        mflr    r0
-       stw     r0,LA_LR_OFFSET(r1)
-       stwu    r1,-40*4(r1)
+       stw     r0,LA_LR_OFFSET(sp)
+       stwu    sp,-40*4(sp)              /* keep stack 16-byte aligned           */
+
+       stw     s0,8*4(sp)                /* save used callee saved registers     */
+       stw     a0,9*4(sp)                /* save method pointer for compiler     */
 
 #if defined(__DARWIN__)
        stw     itmp1,10*4(sp)            /* register r11 is callee saved         */
@@ -128,33 +128,40 @@ asm_vm_call_method_double:
        stfd    ftmp2,16*4(sp)
 
 #if defined(__DARWIN__)
-       stw     t1,18*4(r1)
-       stw     t2,19*4(r1)
-       stw     t3,20*4(r1)
-       stw     t4,21*4(r1)
-       stw     t5,22*4(r1)
-       stw     t6,23*4(r1)
-       stw     t7,24*4(r1)
-
-       stfd    ft0,26*4(r1)
-       stfd    ft1,28*4(r1)
-       stfd    ft2,30*4(r1)
-       stfd    ft3,32*4(r1)
-       stfd    ft4,34*4(r1)
-       stfd    ft5,36*4(r1)
+       stw     t1,18*4(sp)
+       stw     t2,19*4(sp)
+       stw     t3,20*4(sp)
+       stw     t4,21*4(sp)
+       stw     t5,22*4(sp)
+       stw     t6,23*4(sp)
+       stw     t7,24*4(sp)
+
+       stfd    ft0,26*4(sp)
+       stfd    ft1,28*4(sp)
+       stfd    ft2,30*4(sp)
+       stfd    ft3,32*4(sp)
+       stfd    ft4,34*4(sp)
+       stfd    ft5,36*4(sp)
 #else
        SAVE_TEMPORARY_REGISTERS(18)      /* the offset has to be even            */
 #endif
 
-       stw     a0,9*4(r1)                /* save method pointer for compiler     */
+       mr      itmp2,a1                  /* arg count                            */
+       mr      itmp1,a2                  /* pointer to arg block                 */
+
+       mr      t4,itmp2                  /* save argument count                  */
+       mr      t5,itmp1                  /* save argument block pointer          */
 
-       mr      itmp1,r5                  /* pointer to arg block                 */
-       mr      itmp2,r4                  /* arg count                            */
+       mr      s0,sp                     /* save current sp to s0                */
 
        addi    itmp1,itmp1,-sizevmarg    /* initialize pointer (smaller code)    */
        addi    itmp2,itmp2,1             /* initialize argument count            */
        li      t0,0                      /* initialize integer argument counter  */
        li      t1,0                      /* initialize float argument counter    */
+       li      t6,0                      /* initialize integer register counter  */
+#if defined(__DARWIN__)
+       li      t7,0                      /* initialize stack slot counter        */
+#endif
 
        mflr    r0                        /* save link register (PIC code)        */
        bl      L_asm_vm_call_method_get_pc
@@ -168,21 +175,17 @@ L_register_copy:
        mr.     itmp2,itmp2
        beq     L_register_copy_done
 
-#if WORDS_BIGENDIAN == 1
        lwz     itmp3,offvmargtype+4(itmp1)
-#else
-#error XXX
-#endif
        andi.   r0,itmp3,0x0002           /* is this a float/double type?         */
        bne     L_register_handle_float
-
-       cmpwi   t0,INT_ARG_CNT            /* are we out of integer argument       */
+       
+L_register_handle_int:
+       cmpwi   t6,INT_ARG_CNT            /* are we out of integer argument       */
        beq     L_register_copy           /* registers? yes, next loop            */
 
-       andi.   r0,itmp3,0x0001           /* is this a long type?                 */
+       andi.   r0,itmp3,0x0001           /* is this a 2-word type?               */
        bne     L_register_handle_long
 
-L_register_handle_int:
 #if defined(__DARWIN__)
        addis   itmp3,t3,ha16(L_jumptable_int - L_asm_vm_call_method_get_pc)
        la      itmp3,lo16(L_jumptable_int - L_asm_vm_call_method_get_pc)(itmp3)
@@ -190,11 +193,16 @@ L_register_handle_int:
        lis     itmp3,L_jumptable_int@ha
        addi    itmp3,itmp3,L_jumptable_int@l
 #endif
-       slwi    t2,t0,2                   /* multiple of 4-bytes                  */
+
+       slwi    t2,t6,2                   /* multiple of 4-bytes                  */
        add     itmp3,itmp3,t2            /* calculate address of jumptable       */
        lwz     itmp3,0(itmp3)            /* load function address                */
-       addi    t0,t0,1                   /* integer argument counter + 1         */
        mtctr   itmp3
+       addi    t0,t0,1                   /* integer argument counter             */
+       addi    t6,t6,1                   /* integer argument register counter    */
+#if defined(__DARWIN__)
+       addi    t7,t7,1                   /* stack slot counter                   */
+#endif
        bctr
 
 L_register_handle_long:
@@ -205,21 +213,166 @@ L_register_handle_long:
        lis     itmp3,L_jumptable_long@ha
        addi    itmp3,itmp3,L_jumptable_long@l
 #endif
-       addi    t2,t0,1                   /* align to even numbers                */
-       srwi    t2,t2,1
-       slwi    t2,t2,1
-       slwi    t2,t2,2                   /* multiple of 4-bytes                  */
+#if !defined(__DARWIN__)
+       addi    t6,t6,1                   /* align to even numbers                */
+       andi.   t6,t6,0xfffe
+#endif
+
+       cmpwi   t6,(INT_ARG_CNT - 1)      /* are we out of integer argument       */
+       blt     L_register_handle_long_continue /* registers?                     */
+
+       li      t6,INT_ARG_CNT            /* yes, set integer argument register   */
+       b       L_register_copy           /* count to max and next loop           */
+
+L_register_handle_long_continue:
+       slwi    t2,t6,2                   /* multiple of 4-bytes                  */
        add     itmp3,itmp3,t2            /* calculate address of jumptable       */
        lwz     itmp3,0(itmp3)            /* load function address                */
-       addi    t0,t0,1                   /* integer argument counter + 1         */
        mtctr   itmp3
+       addi    t0,t0,1                   /* integer argument counter             */
+       addi    t6,t6,2                   /* integer argument register counter    */
+#if defined(__DARWIN__)
+       addi    t7,t7,2                   /* stack slot counter                   */
+#endif
        bctr
 
 L_register_handle_float:
+       cmpwi   t1,FLT_ARG_CNT            /* are we out of float argument         */
+       beq     L_register_copy           /* registers? yes, next loop            */
+
+       andi.   r0,itmp3,0x0001           /* is this a 2-word type?               */
+       bne     L_register_handle_double
+
+#if defined(__DARWIN__)
+       addis   itmp3,t3,ha16(L_jumptable_float - L_asm_vm_call_method_get_pc)
+       la      itmp3,lo16(L_jumptable_float - L_asm_vm_call_method_get_pc)(itmp3)
+#else
+       lis     itmp3,L_jumptable_float@ha
+       addi    itmp3,itmp3,L_jumptable_float@l
+#endif
+
+       slwi    t2,t1,2                   /* multiple of 4-bytes                  */
+       add     itmp3,itmp3,t2            /* calculate address of jumptable       */
+       lwz     itmp3,0(itmp3)            /* load function address                */
+       mtctr   itmp3
+       addi    t1,t1,1                   /* float argument counter               */
+#if defined(__DARWIN__)
+       addi    t7,t7,1                   /* stack slot counter                   */
+       addi    t6,t6,1                   /* skip 1 integer argument register     */
+#endif
+       bctr
+
+L_register_handle_double:
+#if defined(__DARWIN__)
+       addis   itmp3,t3,ha16(L_jumptable_double - L_asm_vm_call_method_get_pc)
+       la      itmp3,lo16(L_jumptable_double - L_asm_vm_call_method_get_pc)(itmp3)
+#else
+       lis     itmp3,L_jumptable_double@ha
+       addi    itmp3,itmp3,L_jumptable_double@l
+#endif
+
+       slwi    t2,t1,2                   /* multiple of 4-bytes                  */
+       add     itmp3,itmp3,t2            /* calculate address of jumptable       */
+       lwz     itmp3,0(itmp3)            /* load function address                */
+       mtctr   itmp3
+       addi    t1,t1,1                   /* float argument counter               */
+#if defined(__DARWIN__)
+       addi    t7,t7,2                   /* stack slot counter                   */
+       addi    t6,t6,2                   /* skip 2 integer argument registers    */
+#endif
+       bctr
+
 L_register_copy_done:
+                                         /* calculate remaining arguments        */
+       sub     itmp3,t4,t0               /* - integer arguments in registers     */
+       sub     itmp3,itmp3,t1            /* - float arguments in registers       */
+       mr.     itmp3,itmp3
+       beq     L_stack_copy_done
+
+       mr      itmp2,t4                  /* restore argument count               */
+       mr      itmp1,t5                  /* restore argument block pointer       */
+
+       slwi    t4,itmp3,3                /* XXX use 8-bytes slots for now        */
+       addi    t4,t4,LA_SIZE             /* add size of linkage area             */
+
+#if defined(__DARWIN__)
+       slwi    t5,t7,2                   /* add stack space for arguments        */
+       add     t4,t4,t5
+#endif
+
+       sub     sp,sp,t4
+
+       mr      t6,sp                     /* use t6 as temporary sp               */
+       addi    t6,t6,LA_SIZE             /* skip linkage area                    */
+#if defined(__DARWIN__)
+       add     t6,t6,t5                  /* skip stack space for arguments       */
+#endif
+
+       addi    itmp1,itmp1,-sizevmarg    /* initialize pointer (smaller code)    */
+       addi    itmp2,itmp2,1             /* initialize argument count            */
+       
+L_stack_copy_loop:
+       addi    itmp1,itmp1,sizevmarg     /* goto next argument block             */
+       addi    itmp2,itmp2,-1            /* argument count - 1                   */
+       mr.     itmp2,itmp2
+       beq     L_stack_copy_done
+       
+       lwz     itmp3,offvmargtype+4(itmp1)
+       andi.   r0,itmp3,0x0002           /* is this a float/double type?         */
+       bne     L_stack_handle_float
+
+L_stack_handle_int:
+       addi    t0,t0,-1                  /* arguments assigned to registers      */
+       mr.     t0,t0
+       bge     L_stack_copy_loop
+
+       andi.   r0,itmp3,0x0001           /* is this a 2-word type?               */
+       bne     L_stack_handle_long
+
+       lwz     itmp3,offvmargdata+4(itmp1) /* get integer argument               */
+       stw     itmp3,0(t6)               /* and store it on the stack            */
+       addi    t6,t6,4                   /* increase temporary sp by 1 slot      */
+       b       L_stack_copy_loop
+
+L_stack_handle_long:
+#if !defined(__DARWIN__)
+       addi    t6,t6,4                   /* align stack to 8-bytes               */
+       rlwinm  t6,t6,0,30,28             /* clear lower 4-bits                   */
+#endif
+
+       lwz     itmp3,offvmargdata+0(itmp1) /* get long argument                  */
+       stw     itmp3,0(t6)               /* and store it on the stack            */
+       lwz     itmp3,offvmargdata+4(itmp1)
+       stw     itmp3,4(t6)
+       addi    t6,t6,8                   /* increase temporary sp by 2 slots     */
+       b       L_stack_copy_loop
+               
+L_stack_handle_float:
+       addi    t1,t1,-1                  /* arguments assigned to registers      */
+       mr.     t1,t1
+       bge     L_stack_copy_loop
+
+       andi.   r0,itmp3,0x0001           /* is this a 2-word type?               */
+       bne     L_stack_handle_double
+
+       lfs     ftmp3,offvmargdata(itmp1) /* get float argument                   */
+       stfs    ftmp3,0(t6)               /* and store it on the stack            */
+       addi    t6,t6,4                   /* increase temporary sp by 1 slot      */
+       b       L_stack_copy_loop
+
+L_stack_handle_double:
+#if !defined(__DARWIN__)
+       addi    t6,t6,4                   /* align stack to 8-bytes               */
+       rlwinm  t6,t6,0,30,28             /* clear lower 4-bits                   */
+#endif
+
+       lfd     ftmp3,offvmargdata(itmp1) /* get double argument                  */
+       stfd    ftmp3,0(t6)               /* and store it on the stack            */
+       addi    t6,t6,8                   /* increase temporary sp by 2 slots     */
+       b       L_stack_copy_loop
 
 L_stack_copy_done:
-       lwz     itmp1,9*4(sp)             /* pass method pointer via tmp1         */
+       lwz     itmp1,9*4(s0)             /* pass method pointer via tmp1         */
 
 #if defined(__DARWIN__)
        addis   mptr,t3,ha16(L_asm_call_jit_compiler - L_asm_vm_call_method_get_pc)
@@ -228,10 +381,10 @@ L_stack_copy_done:
        lis     mptr,L_asm_call_jit_compiler@ha
        addi    mptr,mptr,L_asm_call_jit_compiler@l
 #endif
-       stw     mptr,8*4(r1)
-       addi    mptr,r1,7*4
+       stw     mptr,7*4(s0)
+       addi    mptr,s0,7*4
 
-       lwz     pv,1*4(mptr)
+       lwz     pv,0*4(mptr)
        mtctr   pv
        bctrl
 1:
@@ -243,6 +396,10 @@ L_stack_copy_done:
 #endif
 
 L_asm_vm_call_method_return:
+       mr      sp,s0                     /* restore the function's sp            */
+
+       lwz     s0,8*4(sp)                /* restore used callee saved registers  */
+
 #if defined(__DARWIN__)
        lwz     itmp1,10*4(sp)            /* register r11 is callee saved         */
 #endif
@@ -253,31 +410,31 @@ L_asm_vm_call_method_return:
        lfd     ftmp2,16*4(sp)
 
 #if defined(__DARWIN__)
-       lwz     t1,18*4(r1)
-       lwz     t2,19*4(r1)
-       lwz     t3,20*4(r1)
-       lwz     t4,21*4(r1)
-       lwz     t5,22*4(r1)
-       lwz     t6,23*4(r1)
-       lwz     t7,24*4(r1)
-
-       lfd     ft0,26*4(r1)
-       lfd     ft1,28*4(r1)
-       lfd     ft2,30*4(r1)
-       lfd     ft3,32*4(r1)
-       lfd     ft4,34*4(r1)
-       lfd     ft5,36*4(r1)
+       lwz     t1,18*4(sp)
+       lwz     t2,19*4(sp)
+       lwz     t3,20*4(sp)
+       lwz     t4,21*4(sp)
+       lwz     t5,22*4(sp)
+       lwz     t6,23*4(sp)
+       lwz     t7,24*4(sp)
+
+       lfd     ft0,26*4(sp)
+       lfd     ft1,28*4(sp)
+       lfd     ft2,30*4(sp)
+       lfd     ft3,32*4(sp)
+       lfd     ft4,34*4(sp)
+       lfd     ft5,36*4(sp)
 #else
        RESTORE_TEMPORARY_REGISTERS(18)   /* the offset has to be even            */
 #endif
 
-       lwz     r0,40*4+LA_LR_OFFSET(r1)
+       lwz     r0,40*4+LA_LR_OFFSET(sp)
        mtlr    r0
-       addi    r1,r1,40*4
+       addi    sp,sp,40*4
        blr
 
 asm_vm_call_method_exception_handler:
-       mr      r3,itmp1
+       mr      a0,itmp1
        bl      builtin_throw_exception
        b       L_asm_vm_call_method_return
 
@@ -329,6 +486,13 @@ L_handle_a7:
 
 L_jumptable_long:
 #if defined(__DARWIN__)
+       .long   L_handle_a0_a1
+       .long   L_handle_a1_a2
+       .long   L_handle_a2_a3
+       .long   L_handle_a3_a4
+       .long   L_handle_a4_a5
+       .long   L_handle_a5_a6
+       .long   L_handle_a6_a7
 #else
        /* we have two entries here, so we get the even argument register
        alignment for linux */
@@ -340,7 +504,6 @@ L_jumptable_long:
        .long   L_handle_a4_a5
        .long   0
        .long   L_handle_a6_a7
-       .long   0
 #endif
 
        .text
@@ -350,20 +513,176 @@ L_handle_a0_a1:
        lwz     a0,offvmargdata+0(itmp1)
        lwz     a1,offvmargdata+4(itmp1)
        b       L_register_copy
+#if defined(__DARWIN__)
+L_handle_a1_a2:
+       lwz     a1,offvmargdata+0(itmp1)
+       lwz     a2,offvmargdata+4(itmp1)
+       b       L_register_copy
+#endif
 L_handle_a2_a3:
        lwz     a2,offvmargdata+0(itmp1)
        lwz     a3,offvmargdata+4(itmp1)
        b       L_register_copy
+#if defined(__DARWIN__)
+L_handle_a3_a4:
+       lwz     a3,offvmargdata+0(itmp1)
+       lwz     a4,offvmargdata+4(itmp1)
+       b       L_register_copy
+#endif
 L_handle_a4_a5:
        lwz     a4,offvmargdata+0(itmp1)
        lwz     a5,offvmargdata+4(itmp1)
        b       L_register_copy
+#if defined(__DARWIN__)
+L_handle_a5_a6:
+       lwz     a5,offvmargdata+0(itmp1)
+       lwz     a6,offvmargdata+4(itmp1)
+       b       L_register_copy
+#endif
 L_handle_a6_a7:
        lwz     a6,offvmargdata+0(itmp1)
        lwz     a7,offvmargdata+4(itmp1)
        b       L_register_copy
 
 
+       .data
+       .align  2
+
+L_jumptable_float:
+       .long   L_handle_fa0
+       .long   L_handle_fa1
+       .long   L_handle_fa2
+       .long   L_handle_fa3
+       .long   L_handle_fa4
+       .long   L_handle_fa5
+       .long   L_handle_fa6
+       .long   L_handle_fa7
+
+#if defined(__DARWIN__)
+       .long   L_handle_fa8
+       .long   L_handle_fa9
+       .long   L_handle_fa10
+       .long   L_handle_fa11
+       .long   L_handle_fa12
+#endif
+
+       .text
+       .align  2
+
+L_handle_fa0:
+       lfs     fa0,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fa1:
+       lfs     fa1,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fa2:
+       lfs     fa2,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fa3:
+       lfs     fa3,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fa4:
+       lfs     fa4,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fa5:
+       lfs     fa5,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fa6:
+       lfs     fa6,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fa7:
+       lfs     fa7,offvmargdata(itmp1)
+       b       L_register_copy
+
+#if defined(__DARWIN__)
+L_handle_fa8:
+       lfs     fa8,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fa9:
+       lfs     fa9,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fa10:
+       lfs     fa10,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fa11:
+       lfs     fa11,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fa12:
+       lfs     fa12,offvmargdata(itmp1)
+       b       L_register_copy
+#endif
+
+
+       .data
+       .align  2
+
+L_jumptable_double:
+       .long   L_handle_fda0
+       .long   L_handle_fda1
+       .long   L_handle_fda2
+       .long   L_handle_fda3
+       .long   L_handle_fda4
+       .long   L_handle_fda5
+       .long   L_handle_fda6
+       .long   L_handle_fda7
+
+#if defined(__DARWIN__)
+       .long   L_handle_fda8
+       .long   L_handle_fda9
+       .long   L_handle_fda10
+       .long   L_handle_fda11
+       .long   L_handle_fda12
+#endif
+
+       .text
+       .align  2
+
+L_handle_fda0:
+       lfd     fa0,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fda1:
+       lfd     fa1,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fda2:
+       lfd     fa2,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fda3:
+       lfd     fa3,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fda4:
+       lfd     fa4,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fda5:
+       lfd     fa5,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fda6:
+       lfd     fa6,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fda7:
+       lfd     fa7,offvmargdata(itmp1)
+       b       L_register_copy
+
+#if defined(__DARWIN__)
+L_handle_fda8:
+       lfd     fa8,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fda9:
+       lfd     fa9,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fda10:
+       lfd     fa10,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fda11:
+       lfd     fa11,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_fda12:
+       lfd     fa12,offvmargdata(itmp1)
+       b       L_register_copy
+#endif
+
+asm_vm_call_method_end:
+       nop
+
 /* asm_call_jit_compiler *******************************************************
 
    Invokes the compiler for untranslated JavaVM methods.
@@ -373,139 +692,92 @@ L_handle_a6_a7:
 asm_call_jit_compiler:
 L_asm_call_jit_compiler:                /* required for PIC code              */
        mflr    r0
-       stw     r0,LA_LR_OFFSET(r1)         /* save return address                */
-       stwu    r1,-((LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8 + 3*4)+sizestackframeinfo)(r1)
-       stw     itmp1,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8 + 1*4)(r1)
-
-       mr      itmp1,r0                    /* save return address to other reg.  */
-       lwz     itmp3,-12(itmp1)
-       srwi    itmp3,itmp3,16
-       andi.   itmp3,itmp3,31
-       cmpwi   itmp3,mptrn
-       beq     noregchange
-       lwz     itmp3,4(itmp1)
-       extsh   itmp3,itmp3
-       add     mptr,itmp3,itmp1
-       lwz     itmp3,8(itmp1)
-       srwi    itmp3,itmp3,16
-       cmpwi   itmp3,0x3dad
-       bne     noregchange
-       lwz     itmp3,8(itmp1)
-       slwi    itmp3,itmp3,16
-       add     mptr,mptr,itmp3
-               
-noregchange:
-       stw     mptr,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8 + 2*4)(r1)
+       stw     r0,LA_LR_OFFSET(sp)         /* save return address                */
+       stwu    sp,-(LA_SIZE + 4*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)(sp)
 
 #if defined(__DARWIN__)
-       stw     a0,(LA_WORD_SIZE+5+0)*4(r1)
-       stw     a1,(LA_WORD_SIZE+5+1)*4(r1)
-       stw     a2,(LA_WORD_SIZE+5+2)*4(r1)
-       stw     a3,(LA_WORD_SIZE+5+3)*4(r1)
-       stw     a4,(LA_WORD_SIZE+5+4)*4(r1)
-       stw     a5,(LA_WORD_SIZE+5+5)*4(r1)
-       stw     a6,(LA_WORD_SIZE+5+6)*4(r1)
-       stw     a7,(LA_WORD_SIZE+5+7)*4(r1)
-
-       stfd    fa0,(LA_WORD_SIZE+5+8)*4(r1)
-       stfd    fa1,(LA_WORD_SIZE+5+10)*4(r1)
-       stfd    fa2,(LA_WORD_SIZE+5+12)*4(r1)
-       stfd    fa3,(LA_WORD_SIZE+5+14)*4(r1)
-       stfd    fa4,(LA_WORD_SIZE+5+16)*4(r1)
-       stfd    fa5,(LA_WORD_SIZE+5+18)*4(r1)
-       stfd    fa6,(LA_WORD_SIZE+5+20)*4(r1)
-       stfd    fa7,(LA_WORD_SIZE+5+22)*4(r1)
-       stfd    fa8,(LA_WORD_SIZE+5+24)*4(r1)
-       stfd    fa9,(LA_WORD_SIZE+5+26)*4(r1)
-       stfd    fa10,(LA_WORD_SIZE+5+28)*4(r1)
-       stfd    fa11,(LA_WORD_SIZE+5+30)*4(r1)
-       stfd    fa12,(LA_WORD_SIZE+5+32)*4(r1)
+       stw     a0,LA_SIZE+(4+0)*4(sp)
+       stw     a1,LA_SIZE+(4+1)*4(sp)
+       stw     a2,LA_SIZE+(4+2)*4(sp)
+       stw     a3,LA_SIZE+(4+3)*4(sp)
+       stw     a4,LA_SIZE+(4+4)*4(sp)
+       stw     a5,LA_SIZE+(4+5)*4(sp)
+       stw     a6,LA_SIZE+(4+6)*4(sp)
+       stw     a7,LA_SIZE+(4+7)*4(sp)
+
+       stfd    fa0,LA_SIZE+(4+8)*4(sp)
+       stfd    fa1,LA_SIZE+(4+10)*4(sp)
+       stfd    fa2,LA_SIZE+(4+12)*4(sp)
+       stfd    fa3,LA_SIZE+(4+14)*4(sp)
+       stfd    fa4,LA_SIZE+(4+16)*4(sp)
+       stfd    fa5,LA_SIZE+(4+18)*4(sp)
+       stfd    fa6,LA_SIZE+(4+20)*4(sp)
+       stfd    fa7,LA_SIZE+(4+22)*4(sp)
+       stfd    fa8,LA_SIZE+(4+24)*4(sp)
+       stfd    fa9,LA_SIZE+(4+26)*4(sp)
+       stfd    fa10,LA_SIZE+(4+28)*4(sp)
+       stfd    fa11,LA_SIZE+(4+30)*4(sp)
+       stfd    fa12,LA_SIZE+(4+32)*4(sp)
 #else
-       SAVE_ARGUMENT_REGISTERS(LA_WORD_SIZE+1)
+       SAVE_ARGUMENT_REGISTERS(LA_SIZE_IN_POINTERS)
 #endif
 
-       addi    a0,sp,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8 + 3*4)
-       li      a1,0                        /* we don't have pv handy             */
-       addi    a2,sp,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8 + 3*4)+sizestackframeinfo
-       lwz     a3,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8 + 3*4)+sizestackframeinfo+LA_LR_OFFSET(sp)
-       mr      a4,a3                       /* xpc is equal to ra                 */
-       bl      stacktrace_create_extern_stackframeinfo
-
-       lwz     a0,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8 + 1*4)(r1)
-       bl      jit_compile                 /* compile the Java method            */
-       mr      pv,r3                       /* move address to pv register        */
-
-       addi    a0,sp,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8 + 3*4)
-       bl      stacktrace_remove_stackframeinfo
+       mr      a0,itmp1
+       mr      a1,mptr
+       addi    a2,sp,(LA_SIZE + 4*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)
+       lwz     a3,(LA_SIZE + 4*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)+LA_LR_OFFSET(sp)
+       bl      jit_asm_compile
+       mr      pv,v0                       /* move address to pv register        */
 
 #if defined(__DARWIN__)
-       lwz     a0,(LA_WORD_SIZE+5+0)*4(r1)
-       lwz     a1,(LA_WORD_SIZE+5+1)*4(r1)
-       lwz     a2,(LA_WORD_SIZE+5+2)*4(r1)
-       lwz     a3,(LA_WORD_SIZE+5+3)*4(r1)
-       lwz     a4,(LA_WORD_SIZE+5+4)*4(r1)
-       lwz     a5,(LA_WORD_SIZE+5+5)*4(r1)
-       lwz     a6,(LA_WORD_SIZE+5+6)*4(r1)
-       lwz     a7,(LA_WORD_SIZE+5+7)*4(r1)
-
-       lfd     fa0,(LA_WORD_SIZE+5+8)*4(r1)
-       lfd     fa1,(LA_WORD_SIZE+5+10)*4(r1)
-       lfd     fa2,(LA_WORD_SIZE+5+12)*4(r1)
-       lfd     fa3,(LA_WORD_SIZE+5+14)*4(r1)
-       lfd     fa4,(LA_WORD_SIZE+5+16)*4(r1)
-       lfd     fa5,(LA_WORD_SIZE+5+18)*4(r1)
-       lfd     fa6,(LA_WORD_SIZE+5+20)*4(r1)
-       lfd     fa7,(LA_WORD_SIZE+5+22)*4(r1)
-       lfd     fa8,(LA_WORD_SIZE+5+24)*4(r1)
-       lfd     fa9,(LA_WORD_SIZE+5+26)*4(r1)
-       lfd     fa10,(LA_WORD_SIZE+5+28)*4(r1)
-       lfd     fa11,(LA_WORD_SIZE+5+30)*4(r1)
-       lfd     fa12,(LA_WORD_SIZE+5+32)*4(r1)
+       lwz     a0,LA_SIZE+(4+0)*4(sp)
+       lwz     a1,LA_SIZE+(4+1)*4(sp)
+       lwz     a2,LA_SIZE+(4+2)*4(sp)
+       lwz     a3,LA_SIZE+(4+3)*4(sp)
+       lwz     a4,LA_SIZE+(4+4)*4(sp)
+       lwz     a5,LA_SIZE+(4+5)*4(sp)
+       lwz     a6,LA_SIZE+(4+6)*4(sp)
+       lwz     a7,LA_SIZE+(4+7)*4(sp)
+
+       lfd     fa0,LA_SIZE+(4+8)*4(sp)
+       lfd     fa1,LA_SIZE+(4+10)*4(sp)
+       lfd     fa2,LA_SIZE+(4+12)*4(sp)
+       lfd     fa3,LA_SIZE+(4+14)*4(sp)
+       lfd     fa4,LA_SIZE+(4+16)*4(sp)
+       lfd     fa5,LA_SIZE+(4+18)*4(sp)
+       lfd     fa6,LA_SIZE+(4+20)*4(sp)
+       lfd     fa7,LA_SIZE+(4+22)*4(sp)
+       lfd     fa8,LA_SIZE+(4+24)*4(sp)
+       lfd     fa9,LA_SIZE+(4+26)*4(sp)
+       lfd     fa10,LA_SIZE+(4+28)*4(sp)
+       lfd     fa11,LA_SIZE+(4+30)*4(sp)
+       lfd     fa12,LA_SIZE+(4+32)*4(sp)
 #else
-       RESTORE_ARGUMENT_REGISTERS(LA_WORD_SIZE+1)
+       RESTORE_ARGUMENT_REGISTERS(LA_SIZE_IN_POINTERS)
 #endif
 
-       lwz     mptr,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8 + 2*4)(r1)
-
-       lwz     itmp1,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8 + 3*4)+sizestackframeinfo+LA_LR_OFFSET(r1)
+       lwz     itmp1,(LA_SIZE + 4*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)+LA_LR_OFFSET(sp)
        mtlr    itmp1
-       addi    r1,r1,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8 + 3*4)+sizestackframeinfo
+
+       addi    sp,sp,(LA_SIZE + 4*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)
 
        mr.     pv,pv                       /* test for exception                 */
        beq     L_asm_call_jit_compiler_exception
 
-       lwz     itmp3,-12(itmp1)
-       extsh   itmp3,itmp3
-       add     mptr,mptr,itmp3
-       stw     pv,0(mptr)                  /* store method address               */
-
        mtctr   pv                          /* move method address to control reg */
        bctr                                /* and call the Java method           */
 
 L_asm_call_jit_compiler_exception:
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
        mflr    r0
        stw     r0,LA_LR_OFFSET(sp)
        stwu    sp,-LA_SIZE_ALIGNED(sp)     /* preserve linkage area              */
-       bl      builtin_asm_get_exceptionptrptr
-       lwz     r0,LA_SIZE_ALIGNED+LA_LR_OFFSET(sp)
-       mtlr    r0      
+       bl      exceptions_get_and_clear_exception
+       lwz     xpc,LA_SIZE_ALIGNED+LA_LR_OFFSET(sp)
+       mtlr    xpc     
        addi    sp,sp,LA_SIZE_ALIGNED
-#else
-# if defined(__DARWIN__)
-       lis     v0,ha16(_no_threads_exceptionptr)
-       addi    v0,v0,lo16(_no_threads_exceptionptr)
-# else
-       lis     v0,_no_threads_exceptionptr@ha
-       addi    v0,v0,_no_threads_exceptionptr@l
-# endif
-#endif
-       lwz     xptr,0(v0)                  /* get the exception pointer          */
-       li      itmp3,0
-       stw     itmp3,0(v0)                 /* clear the exception pointer        */
 
-       mflr    xpc
-       addi    xpc,xpc,-4
+       mr      xptr,v0                     /* get exception                      */
+       addi    xpc,xpc,-4                  /* exception address is ra - 4        */
        b       L_asm_handle_nat_exception
 
 
@@ -523,17 +795,25 @@ L_asm_call_jit_compiler_exception:
                
 asm_handle_nat_exception:
 L_asm_handle_nat_exception:             /* required for PIC code              */
-       mflr    r9
-       lwz     itmp3,4(r9)
-       extsh   itmp3,itmp3
-       add     pv,itmp3,r9
-       lwz     itmp3,8(r9)
-       srwi    itmp3,itmp3,16
-       cmpwi   itmp3,0x3dad
-       bne     L_asm_handle_exception
-       lwz     itmp3,8(r9)
-       slwi    itmp3,itmp3,16
-       add     pv,pv,itmp3
+L_asm_handle_exception_stack_loop:
+       mflr    r0
+       addi    sp,sp,-(LA_SIZE+((4+6)*4))  /* allocate stack (+4 for darwin)     */
+       stw     xptr,LA_SIZE+(4+0)*4(sp)    /* save exception pointer             */
+       stw     xpc,LA_SIZE+(4+1)*4(sp)     /* save exception pc                  */
+       stw     r0,LA_SIZE+(4+3)*4(sp)      /* save return address                */
+       li      itmp3,0
+       stw     itmp3,LA_SIZE+(4+4)*4(sp)   /* save maybe-leaf flag (cleared)     */
+
+       mr      a0,r0                       /* pass return address                */
+       bl      md_codegen_get_pv_from_pc   /* get PV from RA                     */
+       stw     v0,LA_SIZE+(4+2)*4(sp)      /* save data segment pointer          */
+
+       lwz     a0,LA_SIZE+(4+0)*4(sp)      /* pass xptr                          */
+       lwz     a1,LA_SIZE+(4+1)*4(sp)      /* pass xpc                           */
+       lwz     a2,LA_SIZE+(4+2)*4(sp)      /* pass PV (v0 == a0)                 */
+       addi    a3,sp,LA_SIZE+((4+6)*4)     /* pass Java SP                       */
+
+       b       L_asm_handle_exception_continue
 
 asm_handle_exception:
 L_asm_handle_exception:                 /* required for PIC code              */
@@ -545,38 +825,34 @@ L_asm_handle_exception:                 /* required for PIC code              */
        SAVE_TEMPORARY_REGISTERS(ARG_CNT)   /* case this is a leaf method         */
 #endif
 
-       li      a3,(ARG_CNT+TMP_CNT)*8      /* prepare a3 for handle_exception    */
-       li      a4,1                        /* set maybe-leaf flag                */
-
-L_asm_handle_exception_stack_loop:
-       addi    sp,sp,-(LA_WORD_SIZE+4+5)*4 /* allocate stack                     */
-       stw     xptr,LA_SIZE+4*4(sp)        /* save exception pointer             */
-       stw     xpc,LA_SIZE+5*4(sp)         /* save exception pc                  */
-       stw     pv,LA_SIZE+6*4(sp)          /* save data segment pointer          */
-       mflr    r0                          /* save return address                */
-       stw     r0,LA_SIZE+5*4(sp)
-       add     a3,a3,sp                    /* calculate Java sp into a3...       */
-       addi    a3,a3,(LA_WORD_SIZE+4+5)*4
-       stw     a4,LA_SIZE+8*4(sp)          /* save maybe-leaf flag               */
+       addi    sp,sp,-(LA_SIZE+(4+6)*4)    /* allocate stack                     */
+       stw     xptr,LA_SIZE+(4+0)*4(sp)    /* save xptr                          */
+       stw     pv,LA_SIZE+(4+2)*4(sp)      /* save PV                            */
+       mflr    r0                          /* save RA                            */
+       stw     r0,LA_SIZE+(4+3)*4(sp)
+       li      t0,1                        /* set maybe-leaf flag                */
+       stw     t0,LA_SIZE+(4+4)*4(sp)      /* save maybe-leaf flag               */
 
        mr      a0,xptr                     /* pass exception pointer             */
        mr      a1,xpc                      /* pass exception pc                  */
        mr      a2,pv                       /* pass data segment pointer          */
-                                           /* a3 is still set                    */
+       addi    a3,sp,LA_SIZE+(ARG_CNT+TMP_CNT)*8+(4+6)*4
+
+L_asm_handle_exception_continue:
        bl      exceptions_handle_exception
 
        mr.     v0,v0
        beq     L_asm_handle_exception_not_catched
 
        mr      xpc,v0                      /* move handlerpc into xpc            */
-       lwz     xptr,LA_SIZE+4*4(sp)        /* restore exception pointer          */
-       lwz     pv,LA_SIZE+6*4(sp)          /* restore data segment pointer       */
-       lwz     r0,LA_SIZE+5*4(sp)          /* restore return address             */
+       lwz     xptr,LA_SIZE+(4+0)*4(sp)    /* restore xptr                       */
+       lwz     pv,LA_SIZE+(4+2)*4(sp)      /* restore PV                         */
+       lwz     r0,LA_SIZE+(4+3)*4(sp)      /* restore RA                         */
        mtlr    r0
-       lwz     a4,LA_SIZE+8*4(sp)          /* get maybe-leaf flag                */
-       addi    sp,sp,(LA_WORD_SIZE+4+5)*4  /* free stack frame                   */
+       lwz     t0,LA_SIZE+(4+4)*4(sp)      /* get maybe-leaf flag                */
+       addi    sp,sp,LA_SIZE+(4+6)*4       /* free stack frame                   */
 
-       mr.     a4,a4
+       mr.     t0,t0
        beq     L_asm_handle_exception_no_leaf
 
 #if defined(__DARWIN__)
@@ -592,106 +868,117 @@ L_asm_handle_exception_no_leaf:
        bctr
 
 L_asm_handle_exception_not_catched:
-       lwz     xptr,LA_SIZE+4*4(sp)        /* restore exception pointer          */
-       lwz     pv,LA_SIZE+6*4(sp)          /* restore data segment pointer       */
-       lwz     r0,LA_SIZE+5*4(sp)          /* restore return address             */
+       lwz     xptr,LA_SIZE+(4+0)*4(sp)    /* restore xptr                       */
+       lwz     pv,LA_SIZE+(4+2)*4(sp)      /* restore PV                         */
+       lwz     r0,LA_SIZE+(4+3)*4(sp)      /* restore RA                         */
        mtlr    r0
-       lwz     a4,LA_SIZE+8*4(sp)          /* get maybe-leaf flag                */
-       addi    sp,sp,(LA_WORD_SIZE+4+5)*4  /* free stack frame                   */
+       lwz     t0,LA_SIZE+(4+4)*4(sp)      /* get maybe-leaf flag                */
+       addi    sp,sp,LA_SIZE+(4+6)*4       /* free stack frame                   */
 
-       mr.     a4,a4
+       mr.     t0,t0
        beq     L_asm_handle_exception_no_leaf_stack
 
        addi    sp,sp,(ARG_CNT+TMP_CNT)*8   /* remove maybe-leaf stackframe       */
-       li      a4,0                        /* clear the maybe-leaf flag          */
+       li      t0,0                        /* clear the maybe-leaf flag          */
 
 L_asm_handle_exception_no_leaf_stack:
-       lwz     t0,FrameSize(pv)            /* get frame size                     */
-       add     t0,sp,t0                    /* pointer to save area               */
+       lwz     t1,FrameSize(pv)            /* get frame size                     */
+       add     t1,sp,t1                    /* pointer to save area               */
 
-       lwz     t1,IsLeaf(pv)               /* is leaf procedure                  */
-       mr.     t1,t1
+       lwz     t2,IsLeaf(pv)               /* is leaf procedure                  */
+       mr.     t2,t2
        bne     L_asm_handle_exception_no_ra_restore
 
-       lwz     r0,LA_LR_OFFSET(t0)         /* restore ra                         */
+       lwz     r0,LA_LR_OFFSET(t1)         /* restore ra                         */
        mtlr    r0
 
 L_asm_handle_exception_no_ra_restore:
        mflr    xpc                         /* the new xpc is ra                  */
-       lwz     t1,IntSave(pv)              /* t1 = saved int register count      */
+       mr      t4,xpc                      /* save RA                            */
+       lwz     t2,IntSave(pv)              /* t2 = saved int register count      */
        bl      ex_int1
 ex_int1:
-       mflr    t2                          /* t2 = current pc                    */
+       mflr    t3                          /* t3 = current pc                    */
 #if defined(__DARWIN__)
-       addi    t2,t2,lo16(ex_int2-ex_int1)
+       addi    t3,t3,lo16(ex_int2-ex_int1)
 #else
-       addi    t2,t2,(ex_int2-ex_int1)@l
+       addi    t3,t3,(ex_int2-ex_int1)@l
 #endif
-       slwi    t1,t1,2                     /* t1 = register count * 4            */
-       subf    t2,t1,t2                    /* t2 = IntSave - t1                  */
-       mtctr   t2
+       slwi    t2,t2,2                     /* t2 = register count * 4            */
+       subf    t3,t2,t3                    /* t3 = IntSave - t2                  */
+       mtctr   t3
        bctr
 
-       lwz     s0,-10*4(t0)
-       lwz     s1,-9*4(t0)
-       lwz     s2,-8*4(t0)
-       lwz     s3,-7*4(t0)
-       lwz     s4,-6*4(t0)
-       lwz     s5,-5*4(t0)
-       lwz     s6,-4*4(t0)
-       lwz     s7,-3*4(t0)
-       lwz     s8,-2*4(t0)
-       lwz     s9,-1*4(t0)
+       lwz     s0,-10*4(t1)
+       lwz     s1,-9*4(t1)
+       lwz     s2,-8*4(t1)
+       lwz     s3,-7*4(t1)
+       lwz     s4,-6*4(t1)
+       lwz     s5,-5*4(t1)
+       lwz     s6,-4*4(t1)
+       lwz     s7,-3*4(t1)
+       lwz     s8,-2*4(t1)
+       lwz     s9,-1*4(t1)
 
 ex_int2:
-       subf    t0,t1,t0                    /* t0 = t0 - register count * 4       */
+       subf    t1,t2,t1                    /* t1 = t1 - register count * 4       */
 
-       lwz     t1,FltSave(pv)
+       lwz     t2,FltSave(pv)
        bl      ex_flt1
 ex_flt1:
-       mflr    t2
+       mflr    t3
 #if defined(__DARWIN__)
-       addi    t2,t2,lo16(ex_flt2-ex_flt1)
+       addi    t3,t3,lo16(ex_flt2-ex_flt1)
 #else
-       addi    t2,t2,(ex_flt2-ex_flt1)@l
+       addi    t3,t3,(ex_flt2-ex_flt1)@l
 #endif
-       slwi    t1,t1,2                     /* t1 = register count * 4            */
-       subf    t2,t1,t2                    /* t2 = FltSave - t1                  */
-       mtctr   t2
+       slwi    t2,t2,2                     /* t2 = register count * 4            */
+       subf    t3,t2,t3                    /* t3 = FltSave - t2                  */
+       mtctr   t3
        bctr
 
-       lfd     fs0,-10*8(t0)
-       lfd     fs1,-9*8(t0)
-       lfd     fs2,-8*8(t0)
-       lfd     fs3,-7*8(t0)
-       lfd     fs4,-6*8(t0)
-       lfd     fs5,-5*8(t0)
-       lfd     fs6,-4*8(t0)
-       lfd     fs7,-3*8(t0)
-       lfd     fs8,-2*8(t0)
-       lfd     fs9,-1*8(t0)
+       lfd     fs0,-10*8(t1)
+       lfd     fs1,-9*8(t1)
+       lfd     fs2,-8*8(t1)
+       lfd     fs3,-7*8(t1)
+       lfd     fs4,-6*8(t1)
+       lfd     fs5,-5*8(t1)
+       lfd     fs6,-4*8(t1)
+       lfd     fs7,-3*8(t1)
+       lfd     fs8,-2*8(t1)
+       lfd     fs9,-1*8(t1)
 
 ex_flt2:
-       lwz     t0,FrameSize(pv)            /* get frame size                     */
-       add     sp,sp,t0                    /* unwind stack                       */
-       li      a3,0                        /* prepare a3 for handle_exception    */
-
-       mtlr    xpc
-       lwz     itmp3,4(xpc)
-       extsh   itmp3,itmp3
-       add     pv,itmp3,xpc
-       lwz     itmp3,8(xpc)
-       srwi    itmp3,itmp3,16
-       cmpwi   itmp3,0x3dad
-       bne     L_asm_handle_exception_stack_loop
-       lwz     itmp3,8(xpc)
-       slwi    itmp3,itmp3,16
-       add     pv,pv,itmp3
-
+       mtlr    t4                          /* restore RA                         */
+       lwz     t1,FrameSize(pv)            /* get frame size                     */
+       add     sp,sp,t1                    /* unwind stack                       */
        b       L_asm_handle_exception_stack_loop
 
 
-/* asm_wrapper_patcher *********************************************************
+/* asm_abstractmethoderror *****************************************************
+
+   Creates and throws an AbstractMethodError.
+
+*******************************************************************************/
+
+asm_abstractmethoderror:
+       mflr    r0
+       stw     r0,LA_LR_OFFSET(sp)
+       stwu    sp,-LA_SIZE_ALIGNED(sp)     /* preserve linkage area              */
+       addi    a0,sp,LA_SIZE_ALIGNED       /* pass java sp                       */
+       mr      a1,r0                       /* pass exception address             */
+       bl      exceptions_asm_new_abstractmethoderror
+       lwz     r0,LA_SIZE_ALIGNED+LA_LR_OFFSET(sp)
+       mtlr    r0                          /* restore return address             */
+       addi    sp,sp,LA_SIZE_ALIGNED
+
+       mr      xptr,v0                     /* get exception pointer              */
+       mr      xpc,r0                      /* we can't use r0 directly in addi   */
+       addi    xpc,xpc,-4                  /* exception address is ra - 4        */
+       b       L_asm_handle_nat_exception
+
+
+/* asm_patcher_wrapper *********************************************************
 
    XXX
 
@@ -705,21 +992,21 @@ ex_flt2:
 
 *******************************************************************************/
 
-asm_wrapper_patcher:
+asm_patcher_wrapper:
        mflr    r0                    /* get Java return address (leaf)           */
        stw     r0,6*4(sp)            /* store it in the stub stackframe          */
                                      /* keep stack 16-bytes aligned: 6+1+37 = 44 */
-       stwu    sp,-(LA_SIZE+(5+58)*4+sizestackframeinfo)(sp)
+       stwu    sp,-(LA_SIZE+(5+58)*4)(sp)
 
 #if defined(__DARWIN__)
-       stw     a0,LA_SIZE+(5+0)*4(r1)      /* save argument registers            */
-       stw     a1,LA_SIZE+(5+1)*4(r1)      /* preserve linkage area (24 bytes)   */
-       stw     a2,LA_SIZE+(5+2)*4(r1)      /* and 4 bytes for 4 argument         */
-       stw     a3,LA_SIZE+(5+3)*4(r1)
-       stw     a4,LA_SIZE+(5+4)*4(r1)
-       stw     a5,LA_SIZE+(5+5)*4(r1)
-       stw     a6,LA_SIZE+(5+6)*4(r1)
-       stw     a7,LA_SIZE+(5+7)*4(r1)
+       stw     a0,LA_SIZE+(5+0)*4(sp)      /* save argument registers            */
+       stw     a1,LA_SIZE+(5+1)*4(sp)      /* preserve linkage area (24 bytes)   */
+       stw     a2,LA_SIZE+(5+2)*4(sp)      /* and 4 bytes for 4 argument         */
+       stw     a3,LA_SIZE+(5+3)*4(sp)
+       stw     a4,LA_SIZE+(5+4)*4(sp)
+       stw     a5,LA_SIZE+(5+5)*4(sp)
+       stw     a6,LA_SIZE+(5+6)*4(sp)
+       stw     a7,LA_SIZE+(5+7)*4(sp)
 
        stfd    fa0,LA_SIZE+(5+8)*4(sp)
        stfd    fa1,LA_SIZE+(5+10)*4(sp)
@@ -735,57 +1022,46 @@ asm_wrapper_patcher:
        stfd    fa11,LA_SIZE+(5+30)*4(sp)
        stfd    fa12,LA_SIZE+(5+32)*4(sp)
 
-       stw     t0,(LA_WORD_SIZE+5+33)*4(r1)
-       stw     t1,(LA_WORD_SIZE+5+34)*4(r1)
-       stw     t2,(LA_WORD_SIZE+5+35)*4(r1)
-       stw     t3,(LA_WORD_SIZE+5+36)*4(r1)
-       stw     t4,(LA_WORD_SIZE+5+37)*4(r1)
-       stw     t5,(LA_WORD_SIZE+5+38)*4(r1)
-       stw     t6,(LA_WORD_SIZE+5+39)*4(r1)
-       stw     t7,(LA_WORD_SIZE+5+40)*4(r1)
-
-       stfd    ft0,(LA_WORD_SIZE+5+42)*4(r1)
-       stfd    ft1,(LA_WORD_SIZE+5+44)*4(r1)
-       stfd    ft2,(LA_WORD_SIZE+5+46)*4(r1)
-       stfd    ft3,(LA_WORD_SIZE+5+48)*4(r1)
-       stfd    ft4,(LA_WORD_SIZE+5+50)*4(r1)
-       stfd    ft5,(LA_WORD_SIZE+5+52)*4(r1)
+       stw     t0,LA_SIZE+(5+33)*4(sp)
+       stw     t1,LA_SIZE+(5+34)*4(sp)
+       stw     t2,LA_SIZE+(5+35)*4(sp)
+       stw     t3,LA_SIZE+(5+36)*4(sp)
+       stw     t4,LA_SIZE+(5+37)*4(sp)
+       stw     t5,LA_SIZE+(5+38)*4(sp)
+       stw     t6,LA_SIZE+(5+39)*4(sp)
+       stw     t7,LA_SIZE+(5+40)*4(sp)
+
+       stfd    ft0,LA_SIZE+(5+42)*4(sp)
+       stfd    ft1,LA_SIZE+(5+44)*4(sp)
+       stfd    ft2,LA_SIZE+(5+46)*4(sp)
+       stfd    ft3,LA_SIZE+(5+48)*4(sp)
+       stfd    ft4,LA_SIZE+(5+50)*4(sp)
+       stfd    ft5,LA_SIZE+(5+52)*4(sp)
 #else
-       SAVE_ARGUMENT_REGISTERS(LA_WORD_SIZE+1) /* save 8 int/8 float arguments   */
-       SAVE_TEMPORARY_REGISTERS(LA_WORD_SIZE+1+24)
+       /* save 8 int/8 float arguments */
+       SAVE_ARGUMENT_REGISTERS(LA_SIZE_IN_POINTERS+1)
+       SAVE_TEMPORARY_REGISTERS(LA_SIZE_IN_POINTERS+1+24)
 #endif
 
        stw     itmp1,LA_SIZE+(5+54)*4(sp)
        stw     itmp2,LA_SIZE+(5+55)*4(sp)
        stw     pv,LA_SIZE+(5+56)*4(sp)
 
-       addi    a0,sp,LA_SIZE+(5+58)*4      /* create stackframe info             */
-       mr      a1,pv
-       addi    a2,sp,(8+LA_WORD_SIZE+5+58)*4+sizestackframeinfo
-       mr      a3,r0                       /* this is correct for leafs          */
-       lwz     a4,((5+LA_WORD_SIZE+5+58)*4+sizestackframeinfo)(sp) /* pass xpc   */
-       bl      stacktrace_create_extern_stackframeinfo
-
-       addi    a0,sp,(0+LA_WORD_SIZE+5+58)*4+sizestackframeinfo  /* pass sp      */
-       lwz     pv,(0+LA_WORD_SIZE+5+58)*4+sizestackframeinfo(sp) /* get function */
-       lwz     itmp1,LA_SIZE+(5+56)*4(sp)  /* move pv to position of fp          */
-       stw     itmp1,(0+LA_WORD_SIZE+5+58)*4+sizestackframeinfo(sp)
-       mtctr   pv                          /* call the patcher function          */
-       bctrl
+       addi    a0,sp,LA_SIZE+(5+58)*4      /* pass SP of patcher stub            */
+       mr      a1,pv                       /* pass PV                            */
+       mr      a2,r0                       /* pass RA (correct for leafs)        */
+       bl      patcher_wrapper
        stw     v0,LA_SIZE+(5+57)*4(sp)     /* save return value                  */
 
-       addi    a0,sp,LA_SIZE+(5+58)*4
-       bl      stacktrace_remove_stackframeinfo /* remove stackframe info        */
-
 #if defined(__DARWIN__)
-       lwz     a0,LA_SIZE+(5+0)*4(r1)
-       lwz     a1,LA_SIZE+(5+1)*4(r1)
-       lwz     a2,LA_SIZE+(5+2)*4(r1)
-       lwz     a3,LA_SIZE+(5+3)*4(r1)
-       lwz     a4,LA_SIZE+(5+4)*4(r1)
-       lwz     a5,LA_SIZE+(5+5)*4(r1)
-       lwz     a6,LA_SIZE+(5+6)*4(r1)
-       lwz     a7,LA_SIZE+(5+7)*4(r1)
+       lwz     a0,LA_SIZE+(5+0)*4(sp)
+       lwz     a1,LA_SIZE+(5+1)*4(sp)
+       lwz     a2,LA_SIZE+(5+2)*4(sp)
+       lwz     a3,LA_SIZE+(5+3)*4(sp)
+       lwz     a4,LA_SIZE+(5+4)*4(sp)
+       lwz     a5,LA_SIZE+(5+5)*4(sp)
+       lwz     a6,LA_SIZE+(5+6)*4(sp)
+       lwz     a7,LA_SIZE+(5+7)*4(sp)
 
        lfd     fa0,LA_SIZE+(5+8)*4(sp)
        lfd     fa1,LA_SIZE+(5+10)*4(sp)
@@ -801,24 +1077,25 @@ asm_wrapper_patcher:
        lfd     fa11,LA_SIZE+(5+30)*4(sp)
        lfd     fa12,LA_SIZE+(5+32)*4(sp)
 
-       lwz     t0,(LA_WORD_SIZE+5+33)*4(r1)
-       lwz     t1,(LA_WORD_SIZE+5+34)*4(r1)
-       lwz     t2,(LA_WORD_SIZE+5+35)*4(r1)
-       lwz     t3,(LA_WORD_SIZE+5+36)*4(r1)
-       lwz     t4,(LA_WORD_SIZE+5+37)*4(r1)
-       lwz     t5,(LA_WORD_SIZE+5+38)*4(r1)
-       lwz     t6,(LA_WORD_SIZE+5+39)*4(r1)
-       lwz     t7,(LA_WORD_SIZE+5+40)*4(r1)
-
-       lfd     ft0,(LA_WORD_SIZE+5+42)*4(r1)
-       lfd     ft1,(LA_WORD_SIZE+5+44)*4(r1)
-       lfd     ft2,(LA_WORD_SIZE+5+46)*4(r1)
-       lfd     ft3,(LA_WORD_SIZE+5+48)*4(r1)
-       lfd     ft4,(LA_WORD_SIZE+5+50)*4(r1)
-       lfd     ft5,(LA_WORD_SIZE+5+52)*4(r1)
+       lwz     t0,LA_SIZE+(5+33)*4(sp)
+       lwz     t1,LA_SIZE+(5+34)*4(sp)
+       lwz     t2,LA_SIZE+(5+35)*4(sp)
+       lwz     t3,LA_SIZE+(5+36)*4(sp)
+       lwz     t4,LA_SIZE+(5+37)*4(sp)
+       lwz     t5,LA_SIZE+(5+38)*4(sp)
+       lwz     t6,LA_SIZE+(5+39)*4(sp)
+       lwz     t7,LA_SIZE+(5+40)*4(sp)
+
+       lfd     ft0,LA_SIZE+(5+42)*4(sp)
+       lfd     ft1,LA_SIZE+(5+44)*4(sp)
+       lfd     ft2,LA_SIZE+(5+46)*4(sp)
+       lfd     ft3,LA_SIZE+(5+48)*4(sp)
+       lfd     ft4,LA_SIZE+(5+50)*4(sp)
+       lfd     ft5,LA_SIZE+(5+52)*4(sp)
 #else
-       RESTORE_ARGUMENT_REGISTERS(LA_WORD_SIZE+1) /* restore 8 int/8 float args  */
-       RESTORE_TEMPORARY_REGISTERS(LA_WORD_SIZE+1+24)
+       /* restore 8 int/8 float arguments */
+       RESTORE_ARGUMENT_REGISTERS(LA_SIZE_IN_POINTERS+1)
+       RESTORE_TEMPORARY_REGISTERS(LA_SIZE_IN_POINTERS+1+24)
 #endif
 
        lwz     itmp1,LA_SIZE+(5+54)*4(sp)
@@ -826,50 +1103,28 @@ asm_wrapper_patcher:
        lwz     pv,LA_SIZE+(5+56)*4(sp)
        lwz     itmp3,LA_SIZE+(5+57)*4(sp)  /* restore return value into temp reg.*/
 
-       lwz     r0,(6+LA_WORD_SIZE+5+58)*4+sizestackframeinfo(sp) /* restore RA   */
+       lwz     r0,6*4+LA_SIZE+(5+58)*4(sp) /* restore RA                         */
        mtlr    r0
 
        mr.     itmp3,itmp3           /* check for an exception                   */
-       beq     L_asm_wrapper_patcher_exception
+       bne     L_asm_patcher_wrapper_exception
 
                                      /* get return address (into JIT code)       */
-       lwz     itmp3,(5+LA_WORD_SIZE+5+58)*4+sizestackframeinfo(sp)
+       lwz     itmp3,5*4+LA_SIZE+(5+58)*4(sp)
 
                                      /* remove stack frame + patcher stub stack  */
-       addi    sp,sp,(8+LA_WORD_SIZE+5+58)*4+sizestackframeinfo
+       addi    sp,sp,8*4+LA_SIZE+(5+58)*4
 
        mtctr   itmp3
        bctr                          /* jump to new patched code                 */
 
-L_asm_wrapper_patcher_exception:
-       lwz     xpc,(5+LA_WORD_SIZE+5+58)*4+sizestackframeinfo(sp)
-       addi    sp,sp,(8+LA_WORD_SIZE+5+58)*4+sizestackframeinfo
-
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
-       mflr    r0
-       stw     r0,LA_LR_OFFSET(sp)
-       stwu    sp,-(LA_SIZE+1*4)(sp) /* preserve linkage area                    */
-       stw     xpc,LA_SIZE+0*4(sp)
-       bl      builtin_asm_get_exceptionptrptr
-       lwz     xpc,LA_SIZE+0*4(sp)
-       lwz     r0,LA_SIZE+1*4+LA_LR_OFFSET(sp)
-       mtlr    r0
-       addi    sp,sp,LA_SIZE+1*4
-#else
-# if defined(__DARWIN__)
-       lis     v0,ha16(_no_threads_exceptionptr)
-       addi    v0,v0,lo16(_no_threads_exceptionptr)
-# else
-       lis     v0,_no_threads_exceptionptr@ha
-       addi    v0,v0,_no_threads_exceptionptr@l
-# endif
-#endif
-
-       lwz     xptr,0(v0)            /* get the exception pointer                */
-       li      itmp3,0
-       stw     itmp3,0(v0)           /* clear the exception pointer              */
+L_asm_patcher_wrapper_exception:
+       mr      xptr,itmp3                  /* get exception                      */
+       lwz     xpc,5*4+LA_SIZE+(5+58)*4(sp)
+       addi    sp,sp,8*4+LA_SIZE+(5+58)*4
        b       L_asm_handle_exception
 
+#if defined(ENABLE_REPLACEMENT)
 
 /* asm_replacement_out *********************************************************
 
@@ -892,46 +1147,48 @@ L_asm_wrapper_patcher_exception:
        /* XXX we should find a cleaner solution here */
 #define REPLACEMENT_ROOM  512
 
+#define sizeexecutionstate_ALIGNED  ((sizeexecutionstate + 15) & ~15)
+
 asm_replacement_out:
     /* create stack frame */
-       addi    sp,sp,-(sizeexecutionstate + REPLACEMENT_ROOM) /* XXX align */
+       addi    sp,sp,-(sizeexecutionstate_ALIGNED + REPLACEMENT_ROOM)
 
        /* save link register */
-       mflr    r16
+       mflr    itmp3
 
        /* save registers in execution state */
-       stw     r0 ,( 0*8+offes_intregs)(sp)
-       stw     r1 ,( 1*8+offes_intregs)(sp)
-       stw     r2 ,( 2*8+offes_intregs)(sp)
-       stw     r3 ,( 3*8+offes_intregs)(sp)
-       stw     r4 ,( 4*8+offes_intregs)(sp)
-       stw     r5 ,( 5*8+offes_intregs)(sp)
-       stw     r6 ,( 6*8+offes_intregs)(sp)
-       stw     r7 ,( 7*8+offes_intregs)(sp)
-       stw     r8 ,( 8*8+offes_intregs)(sp)
-       stw     r9 ,( 9*8+offes_intregs)(sp)
-       stw     r10,(10*8+offes_intregs)(sp)
-       stw     r11,(11*8+offes_intregs)(sp)
-       stw     r12,(12*8+offes_intregs)(sp)
-       stw     r13,(13*8+offes_intregs)(sp)
-       stw     r14,(14*8+offes_intregs)(sp)
-       stw     r15,(15*8+offes_intregs)(sp)
-       stw     r16,(16*8+offes_intregs)(sp) /* link register */
-       stw     r17,(17*8+offes_intregs)(sp)
-       stw     r18,(18*8+offes_intregs)(sp)
-       stw     r19,(19*8+offes_intregs)(sp)
-       stw     r20,(20*8+offes_intregs)(sp)
-       stw     r21,(21*8+offes_intregs)(sp)
-       stw     r22,(22*8+offes_intregs)(sp)
-       stw     r23,(23*8+offes_intregs)(sp)
-       stw     r24,(24*8+offes_intregs)(sp)
-       stw     r25,(25*8+offes_intregs)(sp)
-       stw     r26,(26*8+offes_intregs)(sp)
-       stw     r27,(27*8+offes_intregs)(sp)
-       stw     r28,(28*8+offes_intregs)(sp)
-       stw     r29,(29*8+offes_intregs)(sp)
-       stw     r30,(30*8+offes_intregs)(sp)
-       stw     r31,(31*8+offes_intregs)(sp)
+       stw     r0 ,( 0*4+offes_intregs)(sp)
+       stw     r1 ,( 1*4+offes_intregs)(sp)
+       stw     r2 ,( 2*4+offes_intregs)(sp)
+       stw     r3 ,( 3*4+offes_intregs)(sp)
+       stw     r4 ,( 4*4+offes_intregs)(sp)
+       stw     r5 ,( 5*4+offes_intregs)(sp)
+       stw     r6 ,( 6*4+offes_intregs)(sp)
+       stw     r7 ,( 7*4+offes_intregs)(sp)
+       stw     r8 ,( 8*4+offes_intregs)(sp)
+       stw     r9 ,( 9*4+offes_intregs)(sp)
+       stw     r10,(10*4+offes_intregs)(sp)
+       stw     r11,(11*4+offes_intregs)(sp)
+       stw     r12,(12*4+offes_intregs)(sp)
+       stw     r13,(13*4+offes_intregs)(sp)
+       stw     r14,(14*4+offes_intregs)(sp)
+       stw     r15,(15*4+offes_intregs)(sp)
+       stw     r16,(16*4+offes_intregs)(sp) /* link register stored as itmp3 */
+       stw     r17,(17*4+offes_intregs)(sp)
+       stw     r18,(18*4+offes_intregs)(sp)
+       stw     r19,(19*4+offes_intregs)(sp)
+       stw     r20,(20*4+offes_intregs)(sp)
+       stw     r21,(21*4+offes_intregs)(sp)
+       stw     r22,(22*4+offes_intregs)(sp)
+       stw     r23,(23*4+offes_intregs)(sp)
+       stw     r24,(24*4+offes_intregs)(sp)
+       stw     r25,(25*4+offes_intregs)(sp)
+       stw     r26,(26*4+offes_intregs)(sp)
+       stw     r27,(27*4+offes_intregs)(sp)
+       stw     r28,(28*4+offes_intregs)(sp)
+       stw     r29,(29*4+offes_intregs)(sp)
+       stw     r30,(30*4+offes_intregs)(sp)
+       stw     r31,(31*4+offes_intregs)(sp)
        
        stfd    fr0 ,( 0*8+offes_fltregs)(sp)
        stfd    fr1 ,( 1*8+offes_fltregs)(sp)
@@ -967,7 +1224,7 @@ asm_replacement_out:
        stfd    fr31,(31*8+offes_fltregs)(sp)
        
        /* calculate sp of method */
-       addi    itmp1,sp,(sizeexecutionstate + REPLACEMENT_ROOM + 4*4)
+       addi    itmp1,sp,(sizeexecutionstate_ALIGNED + REPLACEMENT_ROOM + 4*4)
        stw     itmp1,(offes_sp)(sp)
 
        /* store pv */
@@ -989,122 +1246,151 @@ asm_replacement_out:
    NOTE: itmp3 is not restored!
 
    C prototype:
-      void asm_replacement_in(executionstate *es);
+      void asm_replacement_in(executionstate *es, replace_safestack_t *st);
 
 *******************************************************************************/
 
 asm_replacement_in:
-       /* a0 == executionstate *es */
+       /* a0 == executionstate *es      */
+       /* a1 == replace_safestack_t *st */
+
+       /* get arguments */
+       mr              s1,a1                       /* replace_safestack_t *st            */
+       mr              s2,a0                       /* executionstate *es == safe stack   */
+
+       /* switch to the safe stack */
+       mr              sp,s2
+
+       /* reserve linkage area */
+       addi    sp,sp,-(LA_SIZE_ALIGNED)
+
+       /* call replace_build_execution_state(st) */
+       mr              a0,s1
+       bl              replace_build_execution_state
+
+       /* set new sp */
+       lwz             sp,(offes_sp)(s2)
+
+       /* build stack frame */
+       addi    sp,sp,-(sizeexecutionstate_ALIGNED)
+
+       /* call replace_free_safestack(st,& of allocated executionstate_t) */
+       mr              a1,sp /* tmpes */
+       mr              a0,s1 /* st    */
+       addi    sp,sp,-(LA_SIZE_ALIGNED)  /* reserve linkage area */
+       bl              replace_free_safestack
+       addi    sp,sp,+(LA_SIZE_ALIGNED)  /* tear down linkage area */
 
-       /* set new sp and pv */
-       lwz     sp,(offes_sp)(a0)
-       lwz     pv,(offes_pv)(a0)
+       /* set new pv */
+       lwz     pv,(offes_pv)(sp)
        
        /* copy registers from execution state */
-       lwz     r0 ,( 0*8+offes_intregs)(a0)
+       lwz     r0 ,( 0*4+offes_intregs)(sp)
        /* r1 is sp                       */
        /* r2 is reserved                 */
-       /* a0 is loaded below             */
-       lwz     r4 ,( 4*8+offes_intregs)(a0)
-       lwz     r5 ,( 5*8+offes_intregs)(a0)
-       lwz     r6 ,( 6*8+offes_intregs)(a0)
-       lwz     r7 ,( 7*8+offes_intregs)(a0)
-       lwz     r8 ,( 8*8+offes_intregs)(a0)
-       lwz     r9 ,( 9*8+offes_intregs)(a0)
-       lwz     r10,(10*8+offes_intregs)(a0)
-       lwz     r11,(11*8+offes_intregs)(a0)
-       lwz     r12,(12*8+offes_intregs)(a0)
+       lwz     a0 ,( 3*4+offes_intregs)(sp)
+       lwz     r4 ,( 4*4+offes_intregs)(sp)
+       lwz     r5 ,( 5*4+offes_intregs)(sp)
+       lwz     r6 ,( 6*4+offes_intregs)(sp)
+       lwz     r7 ,( 7*4+offes_intregs)(sp)
+       lwz     r8 ,( 8*4+offes_intregs)(sp)
+       lwz     r9 ,( 9*4+offes_intregs)(sp)
+       lwz     r10,(10*4+offes_intregs)(sp)
+       lwz     r11,(11*4+offes_intregs)(sp)
+       lwz     r12,(12*4+offes_intregs)(sp)
        /* r13 is pv                      */
-       lwz     r14,(14*8+offes_intregs)(a0)
-       lwz     r15,(15*8+offes_intregs)(a0)
-       lwz     r16,(16*8+offes_intregs)(a0) /* link register */
-       lwz     r17,(17*8+offes_intregs)(a0)
-       lwz     r18,(18*8+offes_intregs)(a0)
-       lwz     r19,(19*8+offes_intregs)(a0)
-       lwz     r20,(20*8+offes_intregs)(a0)
-       lwz     r21,(21*8+offes_intregs)(a0)
-       lwz     r22,(22*8+offes_intregs)(a0)
-       lwz     r23,(23*8+offes_intregs)(a0)
-       lwz     r24,(24*8+offes_intregs)(a0)
-       lwz     r25,(25*8+offes_intregs)(a0)
-       lwz     r26,(26*8+offes_intregs)(a0)
-       lwz     r27,(27*8+offes_intregs)(a0)
-       lwz     r28,(28*8+offes_intregs)(a0)
-       lwz     r29,(29*8+offes_intregs)(a0)
-       lwz     r30,(30*8+offes_intregs)(a0)
-       lwz     r31,(31*8+offes_intregs)(a0)
+       lwz     r14,(14*4+offes_intregs)(sp)
+       lwz     r15,(15*4+offes_intregs)(sp)
+       lwz     r16,(16*4+offes_intregs)(sp) /* itmp3, later to link register */
+       lwz     r17,(17*4+offes_intregs)(sp)
+       lwz     r18,(18*4+offes_intregs)(sp)
+       lwz     r19,(19*4+offes_intregs)(sp)
+       lwz     r20,(20*4+offes_intregs)(sp)
+       lwz     r21,(21*4+offes_intregs)(sp)
+       lwz     r22,(22*4+offes_intregs)(sp)
+       lwz     r23,(23*4+offes_intregs)(sp)
+       lwz     r24,(24*4+offes_intregs)(sp)
+       lwz     r25,(25*4+offes_intregs)(sp)
+       lwz     r26,(26*4+offes_intregs)(sp)
+       lwz     r27,(27*4+offes_intregs)(sp)
+       lwz     r28,(28*4+offes_intregs)(sp)
+       lwz     r29,(29*4+offes_intregs)(sp)
+       lwz     r30,(30*4+offes_intregs)(sp)
+       lwz     r31,(31*4+offes_intregs)(sp)
        
-       lfd     fr0 ,( 0*8+offes_fltregs)(a0)
-       lfd     fr1 ,( 1*8+offes_fltregs)(a0)
-       lfd     fr2 ,( 2*8+offes_fltregs)(a0)
-       lfd     fr3 ,( 3*8+offes_fltregs)(a0)
-       lfd     fr4 ,( 4*8+offes_fltregs)(a0)
-       lfd     fr5 ,( 5*8+offes_fltregs)(a0)
-       lfd     fr6 ,( 6*8+offes_fltregs)(a0)
-       lfd     fr7 ,( 7*8+offes_fltregs)(a0)
-       lfd     fr8 ,( 8*8+offes_fltregs)(a0)
-       lfd     fr9 ,( 9*8+offes_fltregs)(a0)
-       lfd     fr10,(10*8+offes_fltregs)(a0)
-       lfd     fr11,(11*8+offes_fltregs)(a0)
-       lfd     fr12,(12*8+offes_fltregs)(a0)
-       lfd     fr13,(13*8+offes_fltregs)(a0)
-       lfd     fr14,(14*8+offes_fltregs)(a0)
-       lfd     fr15,(15*8+offes_fltregs)(a0)
-       lfd     fr16,(16*8+offes_fltregs)(a0)
-       lfd     fr17,(17*8+offes_fltregs)(a0)
-       lfd     fr18,(18*8+offes_fltregs)(a0)
-       lfd     fr19,(19*8+offes_fltregs)(a0)
-       lfd     fr20,(20*8+offes_fltregs)(a0)
-       lfd     fr21,(21*8+offes_fltregs)(a0)
-       lfd     fr22,(22*8+offes_fltregs)(a0)
-       lfd     fr23,(23*8+offes_fltregs)(a0)
-       lfd     fr24,(24*8+offes_fltregs)(a0)
-       lfd     fr25,(25*8+offes_fltregs)(a0)
-       lfd     fr26,(26*8+offes_fltregs)(a0)
-       lfd     fr27,(27*8+offes_fltregs)(a0)
-       lfd     fr28,(28*8+offes_fltregs)(a0)
-       lfd     fr29,(29*8+offes_fltregs)(a0)
-       lfd     fr30,(30*8+offes_fltregs)(a0)
-       lfd     fr31,(31*8+offes_fltregs)(a0)
+       lfd     fr0 ,( 0*8+offes_fltregs)(sp)
+       lfd     fr1 ,( 1*8+offes_fltregs)(sp)
+       lfd     fr2 ,( 2*8+offes_fltregs)(sp)
+       lfd     fr3 ,( 3*8+offes_fltregs)(sp)
+       lfd     fr4 ,( 4*8+offes_fltregs)(sp)
+       lfd     fr5 ,( 5*8+offes_fltregs)(sp)
+       lfd     fr6 ,( 6*8+offes_fltregs)(sp)
+       lfd     fr7 ,( 7*8+offes_fltregs)(sp)
+       lfd     fr8 ,( 8*8+offes_fltregs)(sp)
+       lfd     fr9 ,( 9*8+offes_fltregs)(sp)
+       lfd     fr10,(10*8+offes_fltregs)(sp)
+       lfd     fr11,(11*8+offes_fltregs)(sp)
+       lfd     fr12,(12*8+offes_fltregs)(sp)
+       lfd     fr13,(13*8+offes_fltregs)(sp)
+       lfd     fr14,(14*8+offes_fltregs)(sp)
+       lfd     fr15,(15*8+offes_fltregs)(sp)
+       lfd     fr16,(16*8+offes_fltregs)(sp)
+       lfd     fr17,(17*8+offes_fltregs)(sp)
+       lfd     fr18,(18*8+offes_fltregs)(sp)
+       lfd     fr19,(19*8+offes_fltregs)(sp)
+       lfd     fr20,(20*8+offes_fltregs)(sp)
+       lfd     fr21,(21*8+offes_fltregs)(sp)
+       lfd     fr22,(22*8+offes_fltregs)(sp)
+       lfd     fr23,(23*8+offes_fltregs)(sp)
+       lfd     fr24,(24*8+offes_fltregs)(sp)
+       lfd     fr25,(25*8+offes_fltregs)(sp)
+       lfd     fr26,(26*8+offes_fltregs)(sp)
+       lfd     fr27,(27*8+offes_fltregs)(sp)
+       lfd     fr28,(28*8+offes_fltregs)(sp)
+       lfd     fr29,(29*8+offes_fltregs)(sp)
+       lfd     fr30,(30*8+offes_fltregs)(sp)
+       lfd     fr31,(31*8+offes_fltregs)(sp)
 
        /* restore link register */
 
-       mtlr    r16
+       mtlr    itmp3
        
        /* load new pc */
 
-       lwz     itmp3,offes_pc(a0)
+       lwz     itmp3,offes_pc(sp)
 
-       /* load a0 */
-       
-       lwz     a0,(3*8+offes_intregs)(a0)
+       /* remove stack frame */
+
+       addi    sp,sp,+(sizeexecutionstate_ALIGNED)
 
        /* jump to new code */
 
        mtctr   itmp3
        bctr
 
+#endif /* defined(ENABLE_REPLACEMENT) */
+
 /*********************************************************************/
 
 asm_cacheflush:
-       add     r4,r3,r4
-       rlwinm  r3,r3,0,0,26
-       addi    r4,r4,31
-       rlwinm  r4,r4,0,0,26
-       mr      r5,r3
+       add     a1,a0,a1
+       rlwinm  a0,a0,0,0,26
+       addi    a1,a1,31
+       rlwinm  a1,a1,0,0,26
+       mr      a2,a0
 1:
-       cmplw   r3,r4
+       cmplw   a0,a1
        bge     0f
-       dcbst   0,r3
-       addi    r3,r3,32
+       dcbst   0,a0
+       addi    a0,a0,32
        b       1b
 0:
        sync
 1:
-       cmplw   r5,r4
+       cmplw   a2,a1
        bge     0f
-       icbi    0,r5
-       addi    r5,r5,32
+       icbi    0,a2
+       addi    a2,a2,32
        b       1b
 0:
        sync
@@ -1112,196 +1398,51 @@ asm_cacheflush:
        blr
 
 
-       .align 3
-doublezero:
-       .double 0.0
-
-asm_initialize_thread_stack:
-       addi r4,r4,-256
-       stw r3,120(r4)
-       li r3,0
-       stw r3,124(r4)
-       stw r3,0(r4)
-       stw r3,4(r4)
-       stw r3,8(r4)
-       stw r3,12(r4)
-       stw r3,16(r4)
-       stw r3,20(r4)
-       stw r3,24(r4)
-       stw r3,28(r4)
-       stw r3,32(r4)
-       stw r3,36(r4)
-
-       stw r3,128(r4)
-       stw r3,132(r4)
-       stw r3,136(r4)
-       stw r3,140(r4)
-       stw r3,144(r4)
-       stw r3,148(r4)
-       stw r3,152(r4)
-       stw r3,156(r4)
+/* asm_compare_and_swap ********************************************************
 
-       mflr r0
-       bl 0f
-0:
-       mflr r3
-       mtlr r0
-#if defined(__DARWIN__)
-       lfd fr0,lo16(doublezero-0b)(r3)
-#else
-       lfd fr0,(doublezero-0b)@l(r3)
-#endif
+   XXX
 
-       stfd fr0,40(r4)
-       stfd fr0,48(r4)
-       stfd fr0,56(r4)
-       stfd fr0,64(r4)
-       stfd fr0,72(r4)
-       stfd fr0,80(r4)
-       stfd fr0,88(r4)
-       stfd fr0,96(r4)
-       stfd fr0,104(r4)
-       stfd fr0,112(r4)
-
-       stfd fr0,160(r4)
-       stfd fr0,168(r4)
-       stfd fr0,176(r4)
-       stfd fr0,184(r4)
-       stfd fr0,192(r4)
-       stfd fr0,200(r4)
-       stfd fr0,208(r4)
-       stfd fr0,216(r4)
-
-       mr r3,r4
-       blr
+*******************************************************************************/
 
+asm_compare_and_swap:
+1:  lwarx   a6,r0,a0 
+       subf.   r0,a6,a1 
+       bne-    2f 
+       or      r0,a2,a2 
+       stwcx.  r0,r0,a0 
+       bne-    1b 
+2: 
+       mr      a0,a6
+       blr
 
-asm_perform_threadswitch:
-       mflr r0
-       addi r1,r1,-224
-       stw r0,120(r1)
-       stw pv,124(r1)
-       stw r14,0(r1)
-       stw r15,4(r1)
-       stw r24,8(r1)
-       stw r25,12(r1)
-       stw r26,16(r1)
-       stw r27,20(r1)
-       stw r28,24(r1)
-       stw r29,28(r1)
-       stw r30,32(r1)
-       stw r31,36(r1)
-       stfd fr14,40(r1)
-       stfd fr15,48(r1)
-       stfd fr24,56(r1)
-       stfd fr25,64(r1)
-       stfd fr26,72(r1)
-       stfd fr27,80(r1)
-       stfd fr28,88(r1)
-       stfd fr29,96(r1)
-       stfd fr30,104(r1)
-       stfd fr31,112(r1)
-
-       stw r16,128(r1)
-       stw r17,132(r1)
-       stw r18,136(r1)
-       stw r19,140(r1)
-       stw r20,144(r1)
-       stw r21,148(r1)
-       stw r22,152(r1)
-       stw r23,156(r1)
-       stfd fr16,160(r1)
-       stfd fr17,168(r1)
-       stfd fr18,176(r1)
-       stfd fr19,184(r1)
-       stfd fr20,192(r1)
-       stfd fr21,200(r1)
-       stfd fr22,208(r1)
-       stfd fr23,216(r1)
-
-       stw r1,0(r3)
-       stw r1,0(r5)
-       lwz r1,0(r4)
-
-       lwz r0,120(r1)
-       lwz pv,124(r1)
-       lwz r14,0(r1)
-       lwz r15,4(r1)
-       lwz r24,8(r1)
-       lwz r25,12(r1)
-       lwz r26,16(r1)
-       lwz r27,20(r1)
-       lwz r28,24(r1)
-       lwz r29,28(r1)
-       lwz r30,32(r1)
-       lwz r31,36(r1)
-       lfd fr14,40(r1)
-       lfd fr15,48(r1)
-       lfd fr24,56(r1)
-       lfd fr25,64(r1)
-       lfd fr26,72(r1)
-       lfd fr27,80(r1)
-       lfd fr28,88(r1)
-       lfd fr29,96(r1)
-       lfd fr30,104(r1)
-       lfd fr31,112(r1)
-
-       lwz r16,128(r1)
-       lwz r17,132(r1)
-       lwz r18,136(r1)
-       lwz r19,140(r1)
-       lwz r20,144(r1)
-       lwz r21,148(r1)
-       lwz r22,152(r1)
-       lwz r23,156(r1)
-       lfd fr16,160(r1)
-       lfd fr17,168(r1)
-       lfd fr18,176(r1)
-       lfd fr19,184(r1)
-       lfd fr20,192(r1)
-       lfd fr21,200(r1)
-       lfd fr22,208(r1)
-       lfd fr23,216(r1)
 
-       mtlr r0
-       addi r1,r1,224
-       blr
+/* asm_memory_barrier **********************************************************
 
+   XXX
 
-asm_switchstackandcall:
-       mflr r0
-       stwu r3,-48(r3)
-       stw r0,40(r3)
-       stw r1,44(r3)
-       stw r1,0(r5)
-       mr r1,r3
-
-       mtctr r4
-       mr r3,r6
-       bctrl
+*******************************************************************************/
 
-       lwz r0,40(r1)
-       mtlr r0
-       lwz r1,44(r1)
+asm_memory_barrier:
+       sync
        blr
 
 
 asm_getclassvalues_atomic:
 _crit_restart:
 _crit_begin:
-       lwz     r6,offbaseval(r3)
-       lwz     r7,offdiffval(r3)
-       lwz     r8,offbaseval(r4)
+       lwz     a3,offbaseval(a0)
+       lwz     a4,offdiffval(a0)
+       lwz     a5,offbaseval(a1)
 _crit_end:
-       stw     r6,offcast_super_baseval(r5)
-       stw     r7,offcast_super_diffval(r5)
-       stw     r8,offcast_sub_baseval(r5)
+       stw     a3,offcast_super_baseval(a2)
+       stw     a4,offcast_super_diffval(a2)
+       stw     a5,offcast_sub_baseval(a2)
        blr
 
        .data
 
 asm_criticalsections:
-#if defined(USE_THREADS) && defined(NATIVE_THREADS)
+#if defined(ENABLE_THREADS)
        .long   _crit_begin
        .long   _crit_end
        .long   _crit_restart
@@ -1331,6 +1472,26 @@ L_builtin_throw_exception$lazy_ptr:
        .long dyld_stub_binding_helper
 
 
+.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+       .align 2
+L_md_codegen_get_pv_from_pc$stub:
+       .indirect_symbol _md_codegen_get_pv_from_pc
+       mflr r0
+       bcl 20,31,L00$_md_codegen_get_pv_from_pc
+L00$_md_codegen_get_pv_from_pc:
+       mflr r11
+       addis r11,r11,ha16(L_md_codegen_get_pv_from_pc$lazy_ptr - L00$_md_codegen_get_pv_from_pc)
+       mtlr r0
+       lwzu r12,lo16(L_md_codegen_get_pv_from_pc$lazy_ptr - L00$_md_codegen_get_pv_from_pc)(r11)
+       mtctr r12
+       bctr
+.data
+.lazy_symbol_pointer
+L_md_codegen_get_pv_from_pc$lazy_ptr:
+       .indirect_symbol _md_codegen_get_pv_from_pc
+       .long dyld_stub_binding_helper
+
+
 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
        .align 2
 L_exceptions_handle_exception$stub:
@@ -1373,21 +1534,21 @@ L_stacktrace_create_extern_stackframeinfo$lazy_ptr:
 
 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
        .align 2
-L_jit_compile$stub:
-       .indirect_symbol _jit_compile
+L_jit_asm_compile$stub:
+       .indirect_symbol _jit_asm_compile
        mflr r0
-       bcl 20,31,L00$_jit_compile
-L00$_jit_compile:
+       bcl 20,31,L00$_jit_asm_compile
+L00$_jit_asm_compile:
        mflr r11
-       addis r11,r11,ha16(L_jit_compile$lazy_ptr - L00$_jit_compile)
+       addis r11,r11,ha16(L_jit_asm_compile$lazy_ptr - L00$_jit_asm_compile)
        mtlr r0
-       lwzu r12,lo16(L_jit_compile$lazy_ptr - L00$_jit_compile)(r11)
+       lwzu r12,lo16(L_jit_asm_compile$lazy_ptr - L00$_jit_asm_compile)(r11)
        mtctr r12
        bctr
 .data
 .lazy_symbol_pointer
-L_jit_compile$lazy_ptr:
-       .indirect_symbol _jit_compile
+L_jit_asm_compile$lazy_ptr:
+       .indirect_symbol _jit_asm_compile
        .long dyld_stub_binding_helper
 
 
@@ -1413,21 +1574,61 @@ L_stacktrace_remove_stackframeinfo$lazy_ptr:
 
 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
        .align 2
-L_builtin_asm_get_exceptionptrptr$stub:
-       .indirect_symbol _builtin_asm_get_exceptionptrptr
+L_exceptions_get_and_clear_exception$stub:
+       .indirect_symbol _exceptions_get_and_clear_exception
+       mflr r0
+       bcl 20,31,L00$_exceptions_get_and_clear_exception
+L00$_exceptions_get_and_clear_exception:
+       mflr r11
+       addis r11,r11,ha16(L_exceptions_get_and_clear_exception$lazy_ptr - L00$_exceptions_get_and_clear_exception)
+       mtlr r0
+       lwzu r12,lo16(L_exceptions_get_and_clear_exception$lazy_ptr - L00$_exceptions_get_and_clear_exception)(r11)
+       mtctr r12
+       bctr
+.data
+.lazy_symbol_pointer
+L_exceptions_get_and_clear_exception$lazy_ptr:
+       .indirect_symbol _exceptions_get_and_clear_exception
+       .long dyld_stub_binding_helper
+
+
+.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+       .align 2
+L_exceptions_asm_new_abstractmethoderror$stub:
+       .indirect_symbol _exceptions_asm_new_abstractmethoderror
        mflr r0
-       bcl 20,31,L00$_builtin_asm_get_exceptionptrptr
-L00$_builtin_asm_get_exceptionptrptr:
+       bcl 20,31,L00$_exceptions_asm_new_abstractmethoderror
+L00$_exceptions_asm_new_abstractmethoderror:
        mflr r11
-       addis r11,r11,ha16(L_builtin_asm_get_exceptionptrptr$lazy_ptr - L00$_builtin_asm_get_exceptionptrptr)
+       addis r11,r11,ha16(L_exceptions_asm_new_abstractmethoderror$lazy_ptr - L00$_exceptions_asm_new_abstractmethoderror)
        mtlr r0
-       lwzu r12,lo16(L_builtin_asm_get_exceptionptrptr$lazy_ptr - L00$_builtin_asm_get_exceptionptrptr)(r11)
+       lwzu r12,lo16(L_exceptions_asm_new_abstractmethoderror$lazy_ptr - L00$_exceptions_asm_new_abstractmethoderror)(r11)
        mtctr r12
        bctr
 .data
 .lazy_symbol_pointer
-L_builtin_asm_get_exceptionptrptr$lazy_ptr:
-       .indirect_symbol _builtin_asm_get_exceptionptrptr
+L_exceptions_asm_new_abstractmethoderror$lazy_ptr:
+       .indirect_symbol _exceptions_asm_new_abstractmethoderror
+       .long dyld_stub_binding_helper
+
+
+.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+       .align 2
+L_patcher_wrapper$stub:
+       .indirect_symbol _patcher_wrapper
+       mflr r0
+       bcl 20,31,L00$_patcher_wrapper
+L00$_patcher_wrapper:
+       mflr r11
+       addis r11,r11,ha16(L_patcher_wrapper$lazy_ptr - L00$_patcher_wrapper)
+       mtlr r0
+       lwzu r12,lo16(L_patcher_wrapper$lazy_ptr - L00$_patcher_wrapper)(r11)
+       mtctr r12
+       bctr
+.data
+.lazy_symbol_pointer
+L_patcher_wrapper$lazy_ptr:
+       .indirect_symbol _patcher_wrapper
        .long dyld_stub_binding_helper
 
 
@@ -1450,6 +1651,46 @@ L_replace_me$lazy_ptr:
        .indirect_symbol _replace_me
        .long dyld_stub_binding_helper
 
+
+.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+       .align 2
+L_replace_build_execution_state$stub:
+       .indirect_symbol _replace_build_execution_state
+       mflr r0
+       bcl 20,31,L00$_replace_build_execution_state
+L00$_replace_build_execution_state:
+       mflr r11
+       addis r11,r11,ha16(L_replace_build_execution_state$lazy_ptr - L00$_replace_build_execution_state)
+       mtlr r0
+       lwzu r12,lo16(L_replace_build_execution_state$lazy_ptr - L00$_replace_build_execution_state)(r11)
+       mtctr r12
+       bctr
+.data
+.lazy_symbol_pointer
+L_replace_build_execution_state$lazy_ptr:
+       .indirect_symbol _replace_build_execution_state
+       .long dyld_stub_binding_helper
+
+
+.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+       .align 2
+L_replace_free_safestack$stub:
+       .indirect_symbol _replace_free_safestack
+       mflr r0
+       bcl 20,31,L00$_replace_free_safestack
+L00$_replace_free_safestack:
+       mflr r11
+       addis r11,r11,ha16(L_replace_free_safestack$lazy_ptr - L00$_replace_free_safestack)
+       mtlr r0
+       lwzu r12,lo16(L_replace_free_safestack$lazy_ptr - L00$_replace_free_safestack)(r11)
+       mtctr r12
+       bctr
+.data
+.lazy_symbol_pointer
+L_replace_free_safestack$lazy_ptr:
+       .indirect_symbol _replace_free_safestack
+       .long dyld_stub_binding_helper
+
 #endif /* defined(__DARWIN__) */