Merged revisions 7501-7598 via svnmerge from
[cacao.git] / src / vm / jit / powerpc64 / asmpart.S
index f0572d567543267da84a30354a03c1eb08662cf1..49961cd1b47a7a273025fd9ba331d2bda83677b2 100644 (file)
 
    Contact: cacao@cacaojvm.org
 
-   Authors: Andreas Krall
-            Reinhard Grafl
-            Stefan Ring
+   Authors: Roland Lezuo
 
    Changes: Christian Thalinger
             Edwin Steiner
 
-   $Id: asmpart.S 5213 2006-08-07 15:12:20Z tbfg $
+   $Id: asmpart.S 7596 2007-03-28 21:05:53Z twisti $
 
 */
 
 
 #include "config.h"
 
+#define __ASSEMBLY__
+
 #include "md-abi.h"
 #include "md-asm.h"
 
 
 /* export functions ***********************************************************/
 
-       .globl asm_vm_call_method
-       .globl .asm_vm_call_method_int
-       .globl .asm_vm_call_method_long
-       .globl .asm_vm_call_method_float
-       .globl .asm_vm_call_method_double
-
        .globl asm_vm_call_method_exception_handler
+       .globl asm_vm_call_method_end
 
        .globl asm_call_jit_compiler
 
 
        .globl asm_patcher_wrapper
 
+#if defined(ENABLE_REPLACEMENT)
        .globl asm_replacement_out
        .globl .asm_replacement_in
+#endif
 
-       .globl .asm_cacheflush          /* no function descriptor needed, only called direct */
+       .globl asm_cacheflush
        .globl asm_criticalsections
        .globl asm_getclassvalues_atomic
 
 *         void *arg1, void *arg2, void *arg3, void *arg4);                     *
 *                                                                              *
 *******************************************************************************/
-
-       .align 2
-
-       .long   0                         /* catch type all                       */
-       .long   0                         /* exception handler pc                 */
-       .long   0                         /* end pc                               */
-       .long   0                         /* start pc                             */
-       .long   1                         /* extable size                         */
-       .long   0                         /* line number table start              */
-       .long   0                         /* line number table size               */
-       .long   0                         /* fltsave                              */
-       .long   0                         /* intsave                              */
-       .long   0                         /* isleaf                               */
-       .long   0                         /* IsSync                               */
-       .long   0                         /* frame size                           */
-       .long   0                         /* codeinfo pointer                     */
-
-.section ".opd","aw"
-.align 3
-
-asm_vm_call_method:
-       .quad   .asm_vm_call_method,.TOC.@tocbase,0
-       .previous
-       .size asm_vm_call_method, 24
-       .type .asm_vm_call_method,@function
-       .globl .asm_vm_call_method
+       /* this is the method header see src/vm/jit/methodheader.h */
+
+       .align  8
+
+       .quad   0                           /* catch type all                     */
+       .quad   0                           /* handler pc                         */
+       .quad   0                           /* end pc                             */
+       .quad   0                           /* start pc                           */
+       .long   1                           /* extable size                       */
+       .long   0                           /* ALIGNMENT PADDING                  */
+       .quad   0                           /* line number table  start           */
+       .quad   0                           /* line number table  size            */
+       .long   0                           /* ALIGNMENT PADDING                  */
+       .long   0                           /* fltsave                            */
+       .long   0                           /* intsave                            */
+       .long   0                           /* isleaf                             */
+       .long   0                           /* IsSync                             */
+       .long   0                           /* frame size                         */
+       .quad   0                           /* codeinfo pointer                   */
+
+#ifdef ENABLE_LIBJVM
+       
+       .globl asm_vm_call_method
+       .globl asm_vm_call_method_int
+       .globl asm_vm_call_method_long
+       .globl asm_vm_call_method_float
+       .globl asm_vm_call_method_double
+       .section ".opd","aw"
+       .align 3
+
+       asm_vm_call_method:
+       asm_vm_call_method_int:
+       asm_vm_call_method_long:
+       asm_vm_call_method_float:
+       asm_vm_call_method_double:
+               .quad   .asm_vm_call_method,.TOC.@tocbase,0
+               .previous
+               .size asm_vm_call_method, 24
+               .type .asm_vm_call_method,@function
+               .globl .asm_vm_call_method
+#else
+       asm_vm_call_method:
+       .globl asm_vm_call_method
+       asm_vm_call_method_int:
+       .globl asm_vm_call_method_int
+       asm_vm_call_method_long:
+       .globl asm_vm_call_method_long
+       asm_vm_call_method_float:
+       .globl asm_vm_call_method_float
+       asm_vm_call_method_double:
+       .globl asm_vm_call_method_double
+#endif
 
 .asm_vm_call_method:
 .asm_vm_call_method_int:
@@ -124,38 +147,38 @@ asm_vm_call_method:
 .asm_vm_call_method_float:
 .asm_vm_call_method_double:
        mflr    r0
-       stw     r0,LA_LR_OFFSET(r1)
-       stwu    r1,-40*4(r1)
-
-       stw     s0,8*4(sp)                /* save used callee saved registers     */
-       stw     a0,9*4(sp)                /* save method pointer for compiler     */
+       std     r0,LA_LR_OFFSET(sp)
+       stdu    sp,-40*8(sp)
+       
+       std     s0,8*8(sp)                /* save used callee saved registers     */
+       std     a0,9*8(sp)                /* save method pointer for compiler     */
 
 #if defined(__DARWIN__)
-       stw     itmp1,10*4(sp)            /* register r11 is callee saved         */
+       std     itmp1,10*8(sp)            /* register r11 is callee saved         */
 #endif
-       stw     pv,11*4(sp)               /* save PV register                     */
+       std     pv,11*8(sp)               /* save PV register                     */
 
-       stw     itmp3,12*4(sp)            /* registers r14-r31 are callee saved   */
-       stfd    ftmp1,14*4(sp)            /* registers f14-f31 are callee saved   */
-       stfd    ftmp2,16*4(sp)
+       std     itmp3,12*8(sp)            /* registers r14-r31 are callee saved   */
+       stfd    ftmp1,13*8(sp)            /* registers f14-f31 are callee saved   */
+       stfd    ftmp2,14*8(sp)
 
 #if defined(__DARWIN__)
-       stw     t1,18*4(r1)
-       stw     t2,19*4(r1)
-       stw     t3,20*4(r1)
-       stw     t4,21*4(r1)
-       stw     t5,22*4(r1)
-       stw     t6,23*4(r1)
-       stw     t7,24*4(r1)
-
-       stfd    ft0,26*4(r1)
-       stfd    ft1,28*4(r1)
-       stfd    ft2,30*4(r1)
-       stfd    ft3,32*4(r1)
-       stfd    ft4,34*4(r1)
-       stfd    ft5,36*4(r1)
+       std     t1,15*8(r1)
+       std     t2,16*8(r1)
+       std     t3,17*8(r1)
+       std     t4,18*8(r1)
+       std     t5,19*8(r1)
+       std     t6,20*8(r1)
+       std     t7,21*8(r1)
+
+       stfd    ft0,22*8(r1)
+       stfd    ft1,23*8(r1)
+       stfd    ft2,24*8(r1)
+       stfd    ft3,25*8(r1)
+       stfd    ft4,26*8(r1)
+       stfd    ft5,27*8(r1)
 #else
-       SAVE_TEMPORARY_REGISTERS(18)      /* the offset has to be even            */
+       SAVE_TEMPORARY_REGISTERS(15)      /* the offset has to be even            */
 #endif
 
        mr      itmp2,a1                  /* arg count                            */
@@ -171,15 +194,7 @@ asm_vm_call_method:
        li      t0,0                      /* initialize integer argument counter  */
        li      t1,0                      /* initialize float argument counter    */
        li      t6,0                      /* initialize integer register counter  */
-#if defined(__DARWIN__)
-       li      t7,0                      /* initialize stack slot counter        */
-#endif
-
-       mflr    r0                        /* save link register (PIC code)        */
-       bl      L_asm_vm_call_method_get_pc
-L_asm_vm_call_method_get_pc:
-       mflr    t3                        /* t3 contains the current pc           */
-       mtlr    r0
+       li      t3,8                      /* initialize PA counter*/
 
 L_register_copy:
        addi    itmp1,itmp1,sizevmarg     /* goto next argument block             */
@@ -187,6 +202,7 @@ L_register_copy:
        mr.     itmp2,itmp2
        beq     L_register_copy_done
 
+       addi    t3,t3,-1                  /* uses a PA slot                       */
        lwz     itmp3,offvmargtype+4(itmp1)
        andi.   r0,itmp3,0x0002           /* is this a float/double type?         */
        bne     L_register_handle_float
@@ -199,53 +215,35 @@ L_register_handle_int:
        bne     L_register_handle_long
 
 #if defined(__DARWIN__)
-       addis   itmp3,t3,ha16(L_jumptable_int - L_asm_vm_call_method_get_pc)
-       la      itmp3,lo16(L_jumptable_int - L_asm_vm_call_method_get_pc)(itmp3)
+       #error "FIXME for darwin"
 #else
-       lis     itmp3,L_jumptable_int@ha
-       addi    itmp3,itmp3,L_jumptable_int@l
-#endif
-
-       slwi    t2,t6,2                   /* multiple of 4-bytes                  */
-       add     itmp3,itmp3,t2            /* calculate address of jumptable       */
-       lwz     itmp3,0(itmp3)            /* load function address                */
-       mtctr   itmp3
-       addi    t0,t0,1                   /* integer argument counter             */
-       addi    t6,t6,1                   /* integer argument register counter    */
-#if defined(__DARWIN__)
-       addi    t7,t7,1                   /* stack slot counter                   */
+       lis     itmp3,L_jumptable_int@highest           /* load 64bit address   */
+       ori     itmp3,itmp3,L_jumptable_int@higher
+       rldicr  itmp3,itmp3,32,31
+       oris    itmp3,itmp3,L_jumptable_int@h
+       ori     itmp3,itmp3,L_jumptable_int@l
 #endif
-       bctr
+       b       L_register_do_copy_longint
 
 L_register_handle_long:
+
 #if defined(__DARWIN__)
-       addis   itmp3,t3,ha16(L_jumptable_long - L_asm_vm_call_method_get_pc)
-       la      itmp3,lo16(L_jumptable_long - L_asm_vm_call_method_get_pc)(itmp3)
+       #error "FIXME for darwin"
 #else
-       lis     itmp3,L_jumptable_long@ha
-       addi    itmp3,itmp3,L_jumptable_long@l
-#endif
-#if !defined(__DARWIN__)
-       addi    t6,t6,1                   /* align to even numbers                */
-       andi.   t6,t6,0xfffe
+       lis     itmp3,L_jumptable_long@highest          /* load 64bit address   */
+       ori     itmp3,itmp3,L_jumptable_long@higher
+       rldicr  itmp3,itmp3,32,31
+       oris    itmp3,itmp3,L_jumptable_long@h
+       ori     itmp3,itmp3,L_jumptable_long@l
 #endif
 
-       cmpwi   t6,(INT_ARG_CNT - 1)      /* are we out of integer argument       */
-       blt     L_register_handle_long_continue /* registers?                     */
-
-       li      t6,INT_ARG_CNT            /* yes, set integer argument register   */
-       b       L_register_copy           /* count to max and next loop           */
-
-L_register_handle_long_continue:
-       slwi    t2,t6,2                   /* multiple of 4-bytes                  */
+L_register_do_copy_longint:
+       slwi    t2,t6,3                   /* multiple of 8-bytes                  */
        add     itmp3,itmp3,t2            /* calculate address of jumptable       */
-       lwz     itmp3,0(itmp3)            /* load function address                */
+       l     itmp3,0(itmp3)            /* load function address                */
        mtctr   itmp3
        addi    t0,t0,1                   /* integer argument counter             */
-       addi    t6,t6,2                   /* integer argument register counter    */
-#if defined(__DARWIN__)
-       addi    t7,t7,2                   /* stack slot counter                   */
-#endif
+       addi    t6,t6,1                   /* integer argument register counter    */
        bctr
 
 L_register_handle_float:
@@ -256,45 +254,45 @@ L_register_handle_float:
        bne     L_register_handle_double
 
 #if defined(__DARWIN__)
-       addis   itmp3,t3,ha16(L_jumptable_float - L_asm_vm_call_method_get_pc)
-       la      itmp3,lo16(L_jumptable_float - L_asm_vm_call_method_get_pc)(itmp3)
+       #error "FIXME for darwin"
 #else
-       lis     itmp3,L_jumptable_float@ha
-       addi    itmp3,itmp3,L_jumptable_float@l
-#endif
-
-       slwi    t2,t1,2                   /* multiple of 4-bytes                  */
-       add     itmp3,itmp3,t2            /* calculate address of jumptable       */
-       lwz     itmp3,0(itmp3)            /* load function address                */
-       mtctr   itmp3
-       addi    t1,t1,1                   /* float argument counter               */
-#if defined(__DARWIN__)
-       addi    t7,t7,1                   /* stack slot counter                   */
-       addi    t6,t6,1                   /* skip 1 integer argument register     */
+       lis     itmp3,L_jumptable_float@highest         /* load 64bit address   */
+       ori     itmp3,itmp3,L_jumptable_float@higher
+       rldicr  itmp3,itmp3,32,31
+       oris    itmp3,itmp3,L_jumptable_float@h
+       ori     itmp3,itmp3,L_jumptable_float@l
 #endif
-       bctr
+       b L_register_do_copy_floatdouble
 
 L_register_handle_double:
+
 #if defined(__DARWIN__)
-       addis   itmp3,t3,ha16(L_jumptable_double - L_asm_vm_call_method_get_pc)
-       la      itmp3,lo16(L_jumptable_double - L_asm_vm_call_method_get_pc)(itmp3)
+       #error "FIXME for darwin"
 #else
-       lis     itmp3,L_jumptable_double@ha
-       addi    itmp3,itmp3,L_jumptable_double@l
+       lis     itmp3,L_jumptable_double@highest                /* load 64bit address   */
+       ori     itmp3,itmp3,L_jumptable_double@higher
+       rldicr  itmp3,itmp3,32,31
+       oris    itmp3,itmp3,L_jumptable_double@h
+       ori     itmp3,itmp3,L_jumptable_double@l
 #endif
 
-       slwi    t2,t1,2                   /* multiple of 4-bytes                  */
+
+L_register_do_copy_floatdouble:
+
+       slwi    t2,t1,3                   /* multiple of 8-bytes                  */
        add     itmp3,itmp3,t2            /* calculate address of jumptable       */
-       lwz     itmp3,0(itmp3)            /* load function address                */
+       l     itmp3,0(itmp3)            /* load function address                */
        mtctr   itmp3
        addi    t1,t1,1                   /* float argument counter               */
-#if defined(__DARWIN__)
-       addi    t7,t7,2                   /* stack slot counter                   */
-       addi    t6,t6,2                   /* skip 2 integer argument registers    */
-#endif
+       mr.     t3,t3                     /* are we still in PA ?                 */
+       blt     L_float_not_uses_PA 
+       addi    t6,t6,1                   /* if so it uses an interger arg reg    */
+L_float_not_uses_PA:
        bctr
 
+
 L_register_copy_done:
+       subi    sp,sp,PA_SIZE             /* PA_SIZE are used by definition       */
                                          /* calculate remaining arguments        */
        sub     itmp3,t4,t0               /* - integer arguments in registers     */
        sub     itmp3,itmp3,t1            /* - float arguments in registers       */
@@ -306,29 +304,24 @@ L_register_copy_done:
 
        slwi    t4,itmp3,3                /* XXX use 8-bytes slots for now        */
        addi    t4,t4,LA_SIZE             /* add size of linkage area             */
-
-#if defined(__DARWIN__)
-       slwi    t5,t7,2                   /* add stack space for arguments        */
-       add     t4,t4,t5
-#endif
-
        sub     sp,sp,t4
 
        mr      t6,sp                     /* use t6 as temporary sp               */
        addi    t6,t6,LA_SIZE             /* skip linkage area                    */
-#if defined(__DARWIN__)
-       add     t6,t6,t5                  /* skip stack space for arguments       */
-#endif
 
        addi    itmp1,itmp1,-sizevmarg    /* initialize pointer (smaller code)    */
        addi    itmp2,itmp2,1             /* initialize argument count            */
+       li      t3,8                      /* initialize PA counter                */
+       addi    t6,t6,-8                  /* make code simpler                    */
        
 L_stack_copy_loop:
        addi    itmp1,itmp1,sizevmarg     /* goto next argument block             */
        addi    itmp2,itmp2,-1            /* argument count - 1                   */
        mr.     itmp2,itmp2
        beq     L_stack_copy_done
-       
+       addi    t6,t6,8                   /* increase stack */
+L_stack_not_uses_PA:
+
        lwz     itmp3,offvmargtype+4(itmp1)
        andi.   r0,itmp3,0x0002           /* is this a float/double type?         */
        bne     L_stack_handle_float
@@ -341,22 +334,13 @@ L_stack_handle_int:
        andi.   r0,itmp3,0x0001           /* is this a 2-word type?               */
        bne     L_stack_handle_long
 
-       lwz     itmp3,offvmargdata+4(itmp1) /* get integer argument               */
-       stw     itmp3,0(t6)               /* and store it on the stack            */
-       addi    t6,t6,4                   /* increase temporary sp by 1 slot      */
+       lwa     itmp3,offvmargdata+4(itmp1) /* get integer argument               */
+       std     itmp3,0(t6)               /* and store it on the stack            */
        b       L_stack_copy_loop
 
 L_stack_handle_long:
-#if !defined(__DARWIN__)
-       addi    t6,t6,4                   /* align stack to 8-bytes               */
-       rlwinm  t6,t6,0,30,28             /* clear lower 4-bits                   */
-#endif
-
-       lwz     itmp3,offvmargdata+0(itmp1) /* get long argument                  */
-       stw     itmp3,0(t6)               /* and store it on the stack            */
-       lwz     itmp3,offvmargdata+4(itmp1)
-       stw     itmp3,4(t6)
-       addi    t6,t6,8                   /* increase temporary sp by 2 slots     */
+       ld      itmp3,offvmargdata+0(itmp1) /* get long argument                  */
+       std     itmp3,0(t6)               /* and store it on the stack            */
        b       L_stack_copy_loop
                
 L_stack_handle_float:
@@ -368,35 +352,31 @@ L_stack_handle_float:
        bne     L_stack_handle_double
 
        lfs     ftmp3,offvmargdata(itmp1) /* get float argument                   */
-       stfs    ftmp3,0(t6)               /* and store it on the stack            */
-       addi    t6,t6,4                   /* increase temporary sp by 1 slot      */
+       stfd    ftmp3,0(t6)               /* and store it on the stack            */
        b       L_stack_copy_loop
 
 L_stack_handle_double:
-#if !defined(__DARWIN__)
-       addi    t6,t6,4                   /* align stack to 8-bytes               */
-       rlwinm  t6,t6,0,30,28             /* clear lower 4-bits                   */
-#endif
-
        lfd     ftmp3,offvmargdata(itmp1) /* get double argument                  */
        stfd    ftmp3,0(t6)               /* and store it on the stack            */
-       addi    t6,t6,8                   /* increase temporary sp by 2 slots     */
        b       L_stack_copy_loop
 
 L_stack_copy_done:
-       lwz     itmp1,9*4(s0)             /* pass method pointer via tmp1         */
+       ld      itmp1,9*8(s0)             /* pass method pointer via tmp1         */
 
 #if defined(__DARWIN__)
        addis   mptr,t3,ha16(L_asm_call_jit_compiler - L_asm_vm_call_method_get_pc)
        la      mptr,lo16(L_asm_call_jit_compiler - L_asm_vm_call_method_get_pc)(mptr)
 #else
-       lis     mptr,L_asm_call_jit_compiler@ha
-       addi    mptr,mptr,L_asm_call_jit_compiler@l
+       lis     mptr,L_asm_call_jit_compiler@highest    /* load 64bit address   */
+       ori     mptr,mptr,L_asm_call_jit_compiler@higher
+       rldicr  mptr,mptr,32,31
+       oris    mptr,mptr,L_asm_call_jit_compiler@h
+       ori     mptr,mptr,L_asm_call_jit_compiler@l
 #endif
-       stw     mptr,7*4(s0)
-       addi    mptr,s0,7*4
+       std     mptr,7*8(s0)
+       addi    mptr,s0,7*8
 
-       lwz     pv,0*4(mptr)
+       ld      pv,0*8(mptr)
        mtctr   pv
        bctrl
 1:
@@ -410,16 +390,16 @@ L_stack_copy_done:
 L_asm_vm_call_method_return:
        mr      sp,s0                     /* restore the function's sp            */
 
-       lwz     s0,8*4(sp)                /* restore used callee saved registers  */
+       ld      s0,8*8(sp)                /* restore used callee saved registers  */
 
 #if defined(__DARWIN__)
        lwz     itmp1,10*4(sp)            /* register r11 is callee saved         */
 #endif
-       lwz     pv,11*4(sp)               /* save PV register                     */
+       ld      pv,11*8(sp)               /* save PV register                     */
 
-       lwz     itmp3,12*4(sp)
-       lfd     ftmp1,14*4(sp)            /* registers f14-f31 are callee saved   */
-       lfd     ftmp2,16*4(sp)
+       ld      itmp3,12*8(sp)
+       lfd     ftmp1,13*8(sp)            /* registers f14-f31 are callee saved   */
+       lfd     ftmp2,14*8(sp)
 
 #if defined(__DARWIN__)
        lwz     t1,18*4(r1)
@@ -437,12 +417,12 @@ L_asm_vm_call_method_return:
        lfd     ft4,34*4(r1)
        lfd     ft5,36*4(r1)
 #else
-       RESTORE_TEMPORARY_REGISTERS(18)   /* the offset has to be even            */
+       RESTORE_TEMPORARY_REGISTERS(15)   /* the offset has to be even            */
 #endif
 
-       lwz     r0,40*4+LA_LR_OFFSET(r1)
+       ld     r0,40*8+LA_LR_OFFSET(r1)
        mtlr    r0
-       addi    r1,r1,40*4
+       addi    r1,r1,40*8
        blr
 
 asm_vm_call_method_exception_handler:
@@ -452,7 +432,7 @@ asm_vm_call_method_exception_handler:
 
 
        .data
-       .align  2
+       .align  8
 
 L_jumptable_int:
        .quad   L_handle_a0
@@ -465,100 +445,77 @@ L_jumptable_int:
        .quad   L_handle_a7
 
        .text
-       .align  2
+       .align  4
 
 L_handle_a0:
-       lwz     a0,offvmargdata+4(itmp1)
+       lwa     a0,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a1:
-       lwz     a1,offvmargdata+4(itmp1)
+       lwa     a1,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a2:
-       lwz     a2,offvmargdata+4(itmp1)
+       lwa     a2,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a3:
-       lwz     a3,offvmargdata+4(itmp1)
+       lwa     a3,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a4:
-       lwz     a4,offvmargdata+4(itmp1)
+       lwa     a4,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a5:
-       lwz     a5,offvmargdata+4(itmp1)
+       lwa     a5,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a6:
-       lwz     a6,offvmargdata+4(itmp1)
+       lwa     a6,offvmargdata+4(itmp1)
        b       L_register_copy
 L_handle_a7:
-       lwz     a7,offvmargdata+4(itmp1)
+       lwa     a7,offvmargdata+4(itmp1)
        b       L_register_copy
 
 
        .data
-       .align  2
+       .align  8
 
 L_jumptable_long:
-#if defined(__DARWIN__)
-       .quad   L_handle_a0_a1
-       .quad   L_handle_a1_a2
-       .quad   L_handle_a2_a3
-       .quad   L_handle_a3_a4
-       .quad   L_handle_a4_a5
-       .quad   L_handle_a5_a6
-       .quad   L_handle_a6_a7
-#else
-       /* we have two entries here, so we get the even argument register
-       alignment for linux */
-
-       .quad   L_handle_a0_a1
-       .quad   0
-       .quad   L_handle_a2_a3
-       .quad   0
-       .quad   L_handle_a4_a5
-       .quad   0
-       .quad   L_handle_a6_a7
-#endif
+       .quad   L_handle_a0l
+       .quad   L_handle_a1l
+       .quad   L_handle_a2l
+       .quad   L_handle_a3l
+       .quad   L_handle_a4l
+       .quad   L_handle_a5l
+       .quad   L_handle_a6l
+       .quad   L_handle_a7l
 
        .text
-       .align  2
+       .align  4
 
-L_handle_a0_a1:
-       lwz     a0,offvmargdata+0(itmp1)
-       lwz     a1,offvmargdata+4(itmp1)
+L_handle_a0l:
+       ld     a0,offvmargdata(itmp1)
        b       L_register_copy
-#if defined(__DARWIN__)
-L_handle_a1_a2:
-       lwz     a1,offvmargdata+0(itmp1)
-       lwz     a2,offvmargdata+4(itmp1)
+L_handle_a1l:
+       ld     a1,offvmargdata(itmp1)
        b       L_register_copy
-#endif
-L_handle_a2_a3:
-       lwz     a2,offvmargdata+0(itmp1)
-       lwz     a3,offvmargdata+4(itmp1)
+L_handle_a2l:
+       ld     a2,offvmargdata(itmp1)
        b       L_register_copy
-#if defined(__DARWIN__)
-L_handle_a3_a4:
-       lwz     a3,offvmargdata+0(itmp1)
-       lwz     a4,offvmargdata+4(itmp1)
+L_handle_a3l:
+       ld     a3,offvmargdata(itmp1)
        b       L_register_copy
-#endif
-L_handle_a4_a5:
-       lwz     a4,offvmargdata+0(itmp1)
-       lwz     a5,offvmargdata+4(itmp1)
+L_handle_a4l:
+       ld     a4,offvmargdata(itmp1)
        b       L_register_copy
-#if defined(__DARWIN__)
-L_handle_a5_a6:
-       lwz     a5,offvmargdata+0(itmp1)
-       lwz     a6,offvmargdata+4(itmp1)
+L_handle_a5l:
+       ld     a5,offvmargdata(itmp1)
        b       L_register_copy
-#endif
-L_handle_a6_a7:
-       lwz     a6,offvmargdata+0(itmp1)
-       lwz     a7,offvmargdata+4(itmp1)
+L_handle_a6l:
+       ld     a6,offvmargdata(itmp1)
+       b       L_register_copy
+L_handle_a7l:
+       ld     a7,offvmargdata(itmp1)
        b       L_register_copy
-
 
        .data
-       .align  2
+       .align  8
 
 L_jumptable_float:
        .quad   L_handle_fa0
@@ -569,17 +526,14 @@ L_jumptable_float:
        .quad   L_handle_fa5
        .quad   L_handle_fa6
        .quad   L_handle_fa7
-
-#if defined(__DARWIN__)
        .quad   L_handle_fa8
        .quad   L_handle_fa9
        .quad   L_handle_fa10
        .quad   L_handle_fa11
        .quad   L_handle_fa12
-#endif
 
        .text
-       .align  2
+       .align  4
 
 L_handle_fa0:
        lfs     fa0,offvmargdata(itmp1)
@@ -605,8 +559,6 @@ L_handle_fa6:
 L_handle_fa7:
        lfs     fa7,offvmargdata(itmp1)
        b       L_register_copy
-
-#if defined(__DARWIN__)
 L_handle_fa8:
        lfs     fa8,offvmargdata(itmp1)
        b       L_register_copy
@@ -622,11 +574,9 @@ L_handle_fa11:
 L_handle_fa12:
        lfs     fa12,offvmargdata(itmp1)
        b       L_register_copy
-#endif
-
 
        .data
-       .align  2
+       .align  8
 
 L_jumptable_double:
        .quad   L_handle_fda0
@@ -637,17 +587,14 @@ L_jumptable_double:
        .quad   L_handle_fda5
        .quad   L_handle_fda6
        .quad   L_handle_fda7
-
-#if defined(__DARWIN__)
        .quad   L_handle_fda8
        .quad   L_handle_fda9
        .quad   L_handle_fda10
        .quad   L_handle_fda11
        .quad   L_handle_fda12
-#endif
 
        .text
-       .align  2
+       .align  4
 
 L_handle_fda0:
        lfd     fa0,offvmargdata(itmp1)
@@ -673,8 +620,6 @@ L_handle_fda6:
 L_handle_fda7:
        lfd     fa7,offvmargdata(itmp1)
        b       L_register_copy
-
-#if defined(__DARWIN__)
 L_handle_fda8:
        lfd     fa8,offvmargdata(itmp1)
        b       L_register_copy
@@ -690,8 +635,9 @@ L_handle_fda11:
 L_handle_fda12:
        lfd     fa12,offvmargdata(itmp1)
        b       L_register_copy
-#endif
 
+asm_vm_call_method_end:
+       nop
 
 /* asm_call_jit_compiler *******************************************************
 
@@ -702,74 +648,75 @@ L_handle_fda12:
 asm_call_jit_compiler:
 L_asm_call_jit_compiler:                /* required for PIC code              */
        mflr    r0
-       stw     r0,LA_LR_OFFSET(r1)         /* save return address                */
-       stwu    r1,-(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)(r1)
+       std     r0,LA_LR_OFFSET(sp)         /* save return address                */
+       stdu    r1,-(LA_SIZE+PA_SIZE+ARG_CNT*8)(sp)
 
 #if defined(__DARWIN__)
-       stw     a0,(LA_WORD_SIZE+5+0)*4(r1)
-       stw     a1,(LA_WORD_SIZE+5+1)*4(r1)
-       stw     a2,(LA_WORD_SIZE+5+2)*4(r1)
-       stw     a3,(LA_WORD_SIZE+5+3)*4(r1)
-       stw     a4,(LA_WORD_SIZE+5+4)*4(r1)
-       stw     a5,(LA_WORD_SIZE+5+5)*4(r1)
-       stw     a6,(LA_WORD_SIZE+5+6)*4(r1)
-       stw     a7,(LA_WORD_SIZE+5+7)*4(r1)
-
-       stfd    fa0,(LA_WORD_SIZE+5+8)*4(r1)
-       stfd    fa1,(LA_WORD_SIZE+5+10)*4(r1)
-       stfd    fa2,(LA_WORD_SIZE+5+12)*4(r1)
-       stfd    fa3,(LA_WORD_SIZE+5+14)*4(r1)
-       stfd    fa4,(LA_WORD_SIZE+5+16)*4(r1)
-       stfd    fa5,(LA_WORD_SIZE+5+18)*4(r1)
-       stfd    fa6,(LA_WORD_SIZE+5+20)*4(r1)
-       stfd    fa7,(LA_WORD_SIZE+5+22)*4(r1)
-       stfd    fa8,(LA_WORD_SIZE+5+24)*4(r1)
-       stfd    fa9,(LA_WORD_SIZE+5+26)*4(r1)
-       stfd    fa10,(LA_WORD_SIZE+5+28)*4(r1)
-       stfd    fa11,(LA_WORD_SIZE+5+30)*4(r1)
-       stfd    fa12,(LA_WORD_SIZE+5+32)*4(r1)
+       stw     a0,LA_SIZE+(5+0)*8(r1)
+       stw     a1,LA_SIZE+(5+1)*8(r1)
+       stw     a2,LA_SIZE+(5+2)*8(r1)
+       stw     a3,LA_SIZE+(5+3)*8(r1)
+       stw     a4,LA_SIZE+(5+4)*8(r1)
+       stw     a5,LA_SIZE+(5+5)*8(r1)
+       stw     a6,LA_SIZE+(5+6)*8(r1)
+       stw     a7,LA_SIZE+(5+7)*8(r1)
+
+       stfd    fa0,LA_SIZE+(5+8)*8(r1)
+       stfd    fa1,LA_SIZE+(5+10)*8(r1)
+       stfd    fa2,LA_SIZE+(5+12)*8(r1)
+       stfd    fa3,LA_SIZE+(5+14)*8(r1)
+       stfd    fa4,LA_SIZE+(5+16)*8(r1)
+       stfd    fa5,LA_SIZE+(5+18)*8(r1)
+       stfd    fa6,LA_SIZE+(5+20)*8(r1)
+       stfd    fa7,LA_SIZE+(5+22)*8(r1)
+       stfd    fa8,LA_SIZE+(5+24)*8(r1)
+       stfd    fa9,LA_SIZE+(5+26)*8(r1)
+       stfd    fa10,LA_SIZE+(5+28)*8(r1)
+       stfd    fa11,LA_SIZE+(5+30)*8(r1)
+       stfd    fa12,LA_SIZE+(5+32)*8(r1)
 #else
-       SAVE_ARGUMENT_REGISTERS(LA_WORD_SIZE+1)
+       SAVE_ARGUMENT_REGISTERS(LA_SIZE_IN_POINTERS+PA_SIZE_IN_POINTERS)
 #endif
 
        mr      a0,itmp1
        mr      a1,mptr
-       addi    a2,sp,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)
-       lwz     a3,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)+LA_LR_OFFSET(sp)
+       addi    a2,sp,(LA_SIZE + PA_SIZE+ ARG_CNT*8)
+       ld      a3,(LA_SIZE + PA_SIZE + ARG_CNT*8)+LA_LR_OFFSET(sp)
        bl      jit_asm_compile
+       ori     r0,r0,0                     /* nop needed after jump to function desc. */
        mr      pv,v0                       /* move address to pv register        */
 
 #if defined(__DARWIN__)
-       lwz     a0,(LA_WORD_SIZE+5+0)*4(r1)
-       lwz     a1,(LA_WORD_SIZE+5+1)*4(r1)
-       lwz     a2,(LA_WORD_SIZE+5+2)*4(r1)
-       lwz     a3,(LA_WORD_SIZE+5+3)*4(r1)
-       lwz     a4,(LA_WORD_SIZE+5+4)*4(r1)
-       lwz     a5,(LA_WORD_SIZE+5+5)*4(r1)
-       lwz     a6,(LA_WORD_SIZE+5+6)*4(r1)
-       lwz     a7,(LA_WORD_SIZE+5+7)*4(r1)
-
-       lfd     fa0,(LA_WORD_SIZE+5+8)*4(r1)
-       lfd     fa1,(LA_WORD_SIZE+5+10)*4(r1)
-       lfd     fa2,(LA_WORD_SIZE+5+12)*4(r1)
-       lfd     fa3,(LA_WORD_SIZE+5+14)*4(r1)
-       lfd     fa4,(LA_WORD_SIZE+5+16)*4(r1)
-       lfd     fa5,(LA_WORD_SIZE+5+18)*4(r1)
-       lfd     fa6,(LA_WORD_SIZE+5+20)*4(r1)
-       lfd     fa7,(LA_WORD_SIZE+5+22)*4(r1)
-       lfd     fa8,(LA_WORD_SIZE+5+24)*4(r1)
-       lfd     fa9,(LA_WORD_SIZE+5+26)*4(r1)
-       lfd     fa10,(LA_WORD_SIZE+5+28)*4(r1)
-       lfd     fa11,(LA_WORD_SIZE+5+30)*4(r1)
-       lfd     fa12,(LA_WORD_SIZE+5+32)*4(r1)
+       lwz     a0,LA_SIZE+(+5+0)*8(r1)
+       lwz     a1,LA_SIZE+(+5+1)*8(r1)
+       lwz     a2,LA_SIZE+(+5+2)*8(r1)
+       lwz     a3,LA_SIZE+(+5+3)*8(r1)
+       lwz     a4,LA_SIZE+(+5+4)*8(r1)
+       lwz     a5,LA_SIZE+(+5+5)*8(r1)
+       lwz     a6,LA_SIZE+(+5+6)*8(r1)
+       lwz     a7,LA_SIZE+(+5+7)*8(r1)
+
+       lfd     fa0,LA_SIZE+(+5+8)*8(r1)
+       lfd     fa1,LA_SIZE+(+5+10)*8(r1)
+       lfd     fa2,LA_SIZE+(+5+12)*8(r1)
+       lfd     fa3,LA_SIZE+(+5+14)*8(r1)
+       lfd     fa4,LA_SIZE+(+5+16)*8(r1)
+       lfd     fa5,LA_SIZE+(+5+18)*8(r1)
+       lfd     fa6,LA_SIZE+(+5+20)*8(r1)
+       lfd     fa7,LA_SIZE+(+5+22)*8(r1)
+       lfd     fa8,LA_SIZE+(+5+24)*8(r1)
+       lfd     fa9,LA_SIZE+(+5+26)*8(r1)
+       lfd     fa10,LA_SIZE+(+5+28)*8(r1)
+       lfd     fa11,LA_SIZE+(+5+30)*8(r1)
+       lfd     fa12,LA_SIZE+(+5+32)*8(r1)
 #else
-       RESTORE_ARGUMENT_REGISTERS(LA_WORD_SIZE+1)
+       RESTORE_ARGUMENT_REGISTERS(LA_SIZE_IN_POINTERS+PA_SIZE_IN_POINTERS)
 #endif
 
-       lwz     itmp1,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)+LA_LR_OFFSET(r1)
-       mtlr    itmp1
+       ld     itmp1,(LA_SIZE + PA_SIZE + ARG_CNT*8)+LA_LR_OFFSET(sp)
+       mtlr   itmp1
 
-       addi    sp,sp,(LA_SIZE + 5*4 + INT_ARG_CNT*4 + FLT_ARG_CNT*8)
+       addi    sp,sp,(LA_SIZE + PA_SIZE + ARG_CNT*8)
 
        mr.     pv,pv                       /* test for exception                 */
        beq     L_asm_call_jit_compiler_exception
@@ -779,10 +726,10 @@ L_asm_call_jit_compiler:                /* required for PIC code              */
 
 L_asm_call_jit_compiler_exception:
        mflr    r0
-       stw     r0,LA_LR_OFFSET(sp)
-       stwu    sp,-LA_SIZE_ALIGNED(sp)     /* preserve linkage area              */
+       std     r0,LA_LR_OFFSET(sp)
+       stdu    sp,-LA_SIZE_ALIGNED(sp)     /* preserve linkage area              */
        bl      exceptions_get_and_clear_exception
-       lwz     xpc,LA_SIZE_ALIGNED+LA_LR_OFFSET(sp)
+       l     xpc,LA_SIZE_ALIGNED+LA_LR_OFFSET(sp)
        mtlr    xpc     
        addi    sp,sp,LA_SIZE_ALIGNED
 
@@ -805,17 +752,26 @@ L_asm_call_jit_compiler_exception:
                
 asm_handle_nat_exception:
 L_asm_handle_nat_exception:             /* required for PIC code              */
-       mflr    r9
-       lwz     itmp3,4(r9)
-       extsh   itmp3,itmp3
-       add     pv,itmp3,r9
-       lwz     itmp3,8(r9)
-       srwi    itmp3,itmp3,16
-       cmpwi   itmp3,0x3dad
-       bne     L_asm_handle_exception
-       lwz     itmp3,8(r9)
-       slwi    itmp3,itmp3,16
-       add     pv,pv,itmp3
+L_asm_handle_exception_stack_loop:
+       mflr    r0
+       addi    sp,sp,-(LA_SIZE+PA_SIZE+((4+6)*8))  /* allocate stack (+4 for darwin)     */
+       std     xptr,LA_SIZE+PA_SIZE+(4+0)*8(sp)    /* save exception pointer             */
+       std     xpc,LA_SIZE+PA_SIZE+(4+1)*8(sp)     /* save exception pc                  */
+       std     r0,LA_SIZE+PA_SIZE+(4+3)*8(sp)      /* save return address                */
+       li      itmp3,0
+       std     itmp3,LA_SIZE+PA_SIZE+(4+4)*8(sp)   /* save maybe-leaf flag (cleared)     */
+
+       mr      a0,r0                       /* pass return address                */
+       bl      md_codegen_get_pv_from_pc   /* get PV from RA                     */
+       std     v0,LA_SIZE+PA_SIZE+(4+2)*8(sp)      /* save data segment pointer          */
+
+       ld      a0,LA_SIZE+PA_SIZE+(4+0)*8(sp)      /* pass xptr                          */
+       ld      a1,LA_SIZE+PA_SIZE+(4+1)*8(sp)      /* pass xpc                           */
+       ld      a2,LA_SIZE+PA_SIZE+(4+2)*8(sp)      /* pass PV (v0 == a0)                 */
+       addi    a3,sp,LA_SIZE+PA_SIZE+((4+6)*8)     /* pass Java SP                       */
+
+       b       L_asm_handle_exception_continue
+
 
 asm_handle_exception:
 L_asm_handle_exception:                 /* required for PIC code              */
@@ -827,38 +783,35 @@ L_asm_handle_exception:                 /* required for PIC code              */
        SAVE_TEMPORARY_REGISTERS(ARG_CNT)   /* case this is a leaf method         */
 #endif
 
-       li      a3,(ARG_CNT+TMP_CNT)*8      /* prepare a3 for handle_exception    */
-       li      a4,1                        /* set maybe-leaf flag                */
-
-L_asm_handle_exception_stack_loop:
-       addi    sp,sp,-(LA_WORD_SIZE+4+5)*4 /* allocate stack                     */
-       stw     xptr,LA_SIZE+4*4(sp)        /* save exception pointer             */
-       stw     xpc,LA_SIZE+5*4(sp)         /* save exception pc                  */
-       stw     pv,LA_SIZE+6*4(sp)          /* save data segment pointer          */
-       mflr    r0                          /* save return address                */
-       stw     r0,LA_SIZE+5*4(sp)
-       add     a3,a3,sp                    /* calculate Java sp into a3...       */
-       addi    a3,a3,(LA_WORD_SIZE+4+5)*4
-       stw     a4,LA_SIZE+8*4(sp)          /* save maybe-leaf flag               */
-
+       addi    sp,sp,-(LA_SIZE+PA_SIZE+(4+6)*8)        /* allocate stack                     */
+       std     xptr,LA_SIZE+PA_SIZE+(4+0)*8(sp)        /* save exception pointer             */
+       std     pv,LA_SIZE+PA_SIZE+(4+2)*8(sp)          /* save data segment pointer          */
+       mflr    r0                                      /* save return address                */
+       std     r0,LA_SIZE+PA_SIZE+(4+3)*8(sp)          
+       li      t0, 1
+       std     t0, LA_SIZE+PA_SIZE+(4+4)*8(sp)         /* maybe-leaf flag */
+       
        mr      a0,xptr                     /* pass exception pointer             */
        mr      a1,xpc                      /* pass exception pc                  */
        mr      a2,pv                       /* pass data segment pointer          */
-                                           /* a3 is still set                    */
+       addi    a3,sp,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+(4+6)*8
+
+
+L_asm_handle_exception_continue:
        bl      exceptions_handle_exception
 
        mr.     v0,v0
        beq     L_asm_handle_exception_not_catched
 
-       mr      xpc,v0                      /* move handlerpc into xpc            */
-       lwz     xptr,LA_SIZE+4*4(sp)        /* restore exception pointer          */
-       lwz     pv,LA_SIZE+6*4(sp)          /* restore data segment pointer       */
-       lwz     r0,LA_SIZE+5*4(sp)          /* restore return address             */
+       mr      xpc,v0                              /* move handlerpc into xpc            */
+       ld      xptr,LA_SIZE+PA_SIZE+(4+0)*8(sp)    /* restore exception pointer          */
+       ld      pv,LA_SIZE+PA_SIZE+(4+2)*8(sp)      /* restore data segment pointer       */
+       ld      r0,LA_SIZE+PA_SIZE+(4+3)*8(sp)      /* restore return address             */
        mtlr    r0
-       lwz     a4,LA_SIZE+8*4(sp)          /* get maybe-leaf flag                */
-       addi    sp,sp,(LA_WORD_SIZE+4+5)*4  /* free stack frame                   */
+       ld      t0,LA_SIZE+PA_SIZE+(4+4)*8(sp)      /* get maybe-leaf flag                */
+       addi    sp,sp,LA_SIZE+PA_SIZE+(4+6)*8       /* free stack frame                   */
 
-       mr.     a4,a4
+       mr.     t0,t0
        beq     L_asm_handle_exception_no_leaf
 
 #if defined(__DARWIN__)
@@ -874,102 +827,88 @@ L_asm_handle_exception_no_leaf:
        bctr
 
 L_asm_handle_exception_not_catched:
-       lwz     xptr,LA_SIZE+4*4(sp)        /* restore exception pointer          */
-       lwz     pv,LA_SIZE+6*4(sp)          /* restore data segment pointer       */
-       lwz     r0,LA_SIZE+5*4(sp)          /* restore return address             */
+       ld      xptr,LA_SIZE+PA_SIZE+(4+0)*8(sp)        /* restore exception pointer          */
+       ld      pv,LA_SIZE+PA_SIZE+(4+2)*8(sp)          /* restore data segment pointer       */
+       ld      r0,LA_SIZE+PA_SIZE+(4+3)*8(sp)          /* restore return address             */
        mtlr    r0
-       lwz     a4,LA_SIZE+8*4(sp)          /* get maybe-leaf flag                */
-       addi    sp,sp,(LA_WORD_SIZE+4+5)*4  /* free stack frame                   */
+       ld      t0,LA_SIZE+PA_SIZE+(4+4)*8(sp)          /* get maybe-leaf flag                */
+       addi    sp,sp,LA_SIZE+PA_SIZE+(4+6)*8           /* free stack frame                   */
 
-       mr.     a4,a4
+       mr.     t0,t0
        beq     L_asm_handle_exception_no_leaf_stack
 
        addi    sp,sp,(ARG_CNT+TMP_CNT)*8   /* remove maybe-leaf stackframe       */
-       li      a4,0                        /* clear the maybe-leaf flag          */
+       li      t0,0                        /* clear the maybe-leaf flag          */
 
 L_asm_handle_exception_no_leaf_stack:
-       lwz     t0,FrameSize(pv)            /* get frame size                     */
-       add     t0,sp,t0                    /* pointer to save area               */
+       lwz     t1,FrameSize(pv)            /* get frame size                     */
+       add     t1,sp,t1                    /* pointer to save area               */
 
-       lwz     t1,IsLeaf(pv)               /* is leaf procedure                  */
-       mr.     t1,t1
+       lwz     t2,IsLeaf(pv)               /* is leaf procedure                  */
+       mr.     t2,t2
        bne     L_asm_handle_exception_no_ra_restore
 
-       lwz     r0,LA_LR_OFFSET(t0)         /* restore ra                         */
+       ld      r0,LA_LR_OFFSET(t1)         /* restore ra                         */
        mtlr    r0
 
 L_asm_handle_exception_no_ra_restore:
        mflr    xpc                         /* the new xpc is ra                  */
-       lwz     t1,IntSave(pv)              /* t1 = saved int register count      */
+       mr      t4,xpc                      /* save RA */
+       lwz     t2,IntSave(pv)              /* t1 = saved int register count      */
        bl      ex_int1
 ex_int1:
-       mflr    t2                          /* t2 = current pc                    */
+       mflr    t3                          /* t3 = current pc                    */
 #if defined(__DARWIN__)
        addi    t2,t2,lo16(ex_int2-ex_int1)
 #else
-       addi    t2,t2,(ex_int2-ex_int1)@l
+       addi    t3,t3,(ex_int2-ex_int1)@l
 #endif
-       slwi    t1,t1,2                     /* t1 = register count * 4            */
-       subf    t2,t1,t2                    /* t2 = IntSave - t1                  */
-       mtctr   t2
+       slwi    t2,t2,2                     /* t2 = register count * 4            */
+       subf    t3,t2,t3                    /* t3 = IntSave - t2                  */
+       mtctr   t3
        bctr
 
-       lwz     s0,-10*4(t0)
-       lwz     s1,-9*4(t0)
-       lwz     s2,-8*4(t0)
-       lwz     s3,-7*4(t0)
-       lwz     s4,-6*4(t0)
-       lwz     s5,-5*4(t0)
-       lwz     s6,-4*4(t0)
-       lwz     s7,-3*4(t0)
-       lwz     s8,-2*4(t0)
-       lwz     s9,-1*4(t0)
+       ld      s0,-9*8(t1)
+       ld      s1,-8*8(t1)
+       ld      s2,-7*8(t1)
+       ld      s3,-6*8(t1)
+       ld      s4,-5*8(t1)
+       ld      s5,-4*8(t1)
+       ld      s6,-3*8(t1)
+       ld      s7,-2*8(t1)
+       ld      s8,-1*8(t1)
 
 ex_int2:
-       subf    t0,t1,t0                    /* t0 = t0 - register count * 4       */
-
-       lwz     t1,FltSave(pv)
+       subf    t1,t2,t1                    /* t1 = t1 - register count * 4       */
+       lwz     t2,FltSave(pv)
        bl      ex_flt1
 ex_flt1:
-       mflr    t2
+       mflr    t3
 #if defined(__DARWIN__)
        addi    t2,t2,lo16(ex_flt2-ex_flt1)
 #else
-       addi    t2,t2,(ex_flt2-ex_flt1)@l
+       addi    t3,t3,(ex_flt2-ex_flt1)@l
 #endif
-       slwi    t1,t1,2                     /* t1 = register count * 4            */
-       subf    t2,t1,t2                    /* t2 = FltSave - t1                  */
-       mtctr   t2
+       slwi    t2,t2,2                     /* t2 = register count * 4            */
+       subf    t3,t2,t3                    /* t3 = FltSave - t2                  */
+       mtctr   t3
        bctr
 
-       lfd     fs0,-10*8(t0)
-       lfd     fs1,-9*8(t0)
-       lfd     fs2,-8*8(t0)
-       lfd     fs3,-7*8(t0)
-       lfd     fs4,-6*8(t0)
-       lfd     fs5,-5*8(t0)
-       lfd     fs6,-4*8(t0)
-       lfd     fs7,-3*8(t0)
-       lfd     fs8,-2*8(t0)
-       lfd     fs9,-1*8(t0)
+       lfd     fs0,-10*8(t1)
+       lfd     fs1,-9*8(t1)
+       lfd     fs2,-8*8(t1)
+       lfd     fs3,-7*8(t1)
+       lfd     fs4,-6*8(t1)
+       lfd     fs5,-5*8(t1)
+       lfd     fs6,-4*8(t1)
+       lfd     fs7,-3*8(t1)
+       lfd     fs8,-2*8(t1)
+       lfd     fs9,-1*8(t1)
 
 ex_flt2:
-       lwz     t0,FrameSize(pv)            /* get frame size                     */
-       add     sp,sp,t0                    /* unwind stack                       */
-       li      a3,0                        /* prepare a3 for handle_exception    */
-
-       mtlr    xpc
-       lwz     itmp3,4(xpc)
-       extsh   itmp3,itmp3
-       add     pv,itmp3,xpc
-       lwz     itmp3,8(xpc)
-       srwi    itmp3,itmp3,16
-       cmpwi   itmp3,0x3dad
-       bne     L_asm_handle_exception_stack_loop
-       lwz     itmp3,8(xpc)
-       slwi    itmp3,itmp3,16
-       add     pv,pv,itmp3
-
+       mtlr    t4                                   /* restore RA */
+       lwz     t1,FrameSize(pv)                     
+       add     sp,sp,t1                             /* unwind stack */
        b       L_asm_handle_exception_stack_loop
 
 
@@ -1001,145 +940,146 @@ asm_abstractmethoderror:
    XXX
 
    Stack layout:
-     20   return address into JIT code (patch position)
-     16   pointer to virtual java_objectheader
-     12   machine code (which is patched back later)
-      8   unresolved class/method/field reference
-      4   data segment displacement from load instructions
+     40   return address into JIT code (patch position)
+     32   pointer to virtual java_objectheader
+     24   machine code (which is patched back later)
+     16   unresolved class/method/field reference
+      8   data segment displacement from load instructions
       0   patcher function pointer to call (pv is saved here afterwards)
 
 *******************************************************************************/
 
 asm_patcher_wrapper:
        mflr    r0                    /* get Java return address (leaf)           */
-       stw     r0,6*4(sp)            /* store it in the stub stackframe          */
+       std     r0,6*8(sp)            /* store it in the stub stackframe          */
                                      /* keep stack 16-bytes aligned: 6+1+37 = 44 */
-       stwu    sp,-(LA_SIZE+(5+58)*4)(sp)
+       stdu    sp,-(LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8)(sp)
 
 #if defined(__DARWIN__)
-       stw     a0,LA_SIZE+(5+0)*4(r1)      /* save argument registers            */
-       stw     a1,LA_SIZE+(5+1)*4(r1)      /* preserve linkage area (24 bytes)   */
-       stw     a2,LA_SIZE+(5+2)*4(r1)      /* and 4 bytes for 4 argument         */
-       stw     a3,LA_SIZE+(5+3)*4(r1)
-       stw     a4,LA_SIZE+(5+4)*4(r1)
-       stw     a5,LA_SIZE+(5+5)*4(r1)
-       stw     a6,LA_SIZE+(5+6)*4(r1)
-       stw     a7,LA_SIZE+(5+7)*4(r1)
-
-       stfd    fa0,LA_SIZE+(5+8)*4(sp)
-       stfd    fa1,LA_SIZE+(5+10)*4(sp)
-       stfd    fa2,LA_SIZE+(5+12)*4(sp)
-       stfd    fa3,LA_SIZE+(5+14)*4(sp)
-       stfd    fa4,LA_SIZE+(5+16)*4(sp)
-       stfd    fa5,LA_SIZE+(5+18)*4(sp)
-       stfd    fa6,LA_SIZE+(5+20)*4(sp)
-       stfd    fa7,LA_SIZE+(5+22)*4(sp)
-       stfd    fa8,LA_SIZE+(5+24)*4(sp)
-       stfd    fa9,LA_SIZE+(5+26)*4(sp)
-       stfd    fa10,LA_SIZE+(5+28)*4(sp)
-       stfd    fa11,LA_SIZE+(5+30)*4(sp)
-       stfd    fa12,LA_SIZE+(5+32)*4(sp)
-
-       stw     t0,(LA_WORD_SIZE+5+33)*4(r1)
-       stw     t1,(LA_WORD_SIZE+5+34)*4(r1)
-       stw     t2,(LA_WORD_SIZE+5+35)*4(r1)
-       stw     t3,(LA_WORD_SIZE+5+36)*4(r1)
-       stw     t4,(LA_WORD_SIZE+5+37)*4(r1)
-       stw     t5,(LA_WORD_SIZE+5+38)*4(r1)
-       stw     t6,(LA_WORD_SIZE+5+39)*4(r1)
-       stw     t7,(LA_WORD_SIZE+5+40)*4(r1)
-
-       stfd    ft0,(LA_WORD_SIZE+5+42)*4(r1)
-       stfd    ft1,(LA_WORD_SIZE+5+44)*4(r1)
-       stfd    ft2,(LA_WORD_SIZE+5+46)*4(r1)
-       stfd    ft3,(LA_WORD_SIZE+5+48)*4(r1)
-       stfd    ft4,(LA_WORD_SIZE+5+50)*4(r1)
-       stfd    ft5,(LA_WORD_SIZE+5+52)*4(r1)
+       stw     a0,LA_SIZE+(5+0)*8(r1)      /* save argument registers            */
+       stw     a1,LA_SIZE+(5+1)*8(r1)      /* preserve linkage area (24 bytes)   */
+       stw     a2,LA_SIZE+(5+2)*8(r1)      /* and 4 bytes for 4 argument         */
+       stw     a3,LA_SIZE+(5+3)*8(r1)
+       stw     a4,LA_SIZE+(5+4)*8(r1)
+       stw     a5,LA_SIZE+(5+5)*8(r1)
+       stw     a6,LA_SIZE+(5+6)*8(r1)
+       stw     a7,LA_SIZE+(5+7)*8(r1)
+
+       stfd    fa0,LA_SIZE+(5+8)*8(sp)
+       stfd    fa1,LA_SIZE+(5+10)*8(sp)
+       stfd    fa2,LA_SIZE+(5+12)*8(sp)
+       stfd    fa3,LA_SIZE+(5+14)*8(sp)
+       stfd    fa4,LA_SIZE+(5+16)*8(sp)
+       stfd    fa5,LA_SIZE+(5+18)*8(sp)
+       stfd    fa6,LA_SIZE+(5+20)*8(sp)
+       stfd    fa7,LA_SIZE+(5+22)*8(sp)
+       stfd    fa8,LA_SIZE+(5+24)*8(sp)
+       stfd    fa9,LA_SIZE+(5+26)*8(sp)
+       stfd    fa10,LA_SIZE+(5+28)*8(sp)
+       stfd    fa11,LA_SIZE+(5+30)*8(sp)
+       stfd    fa12,LA_SIZE+(5+32)*8(sp)       /* XXX */
+
+       stw     t0,LA_SIZE+(+5+33)*8(r1)
+       stw     t1,LA_SIZE+(+5+34)*8(r1)
+       stw     t2,LA_SIZE+(+5+35)*8(r1)
+       stw     t3,LA_SIZE+(+5+36)*8(r1)
+       stw     t4,LA_SIZE+(+5+37)*8(r1)
+       stw     t5,LA_SIZE+(+5+38)*8(r1)
+       stw     t6,LA_SIZE+(+5+39)*8(r1)
+       stw     t7,LA_SIZE+(+5+40)*8(r1)
+
+       stfd    ft0,LA_SIZE+(+5+42)*8(r1)
+       stfd    ft1,LA_SIZE+(+5+44)*8(r1)
+       stfd    ft2,LA_SIZE+(+5+46)*8(r1)
+       stfd    ft3,LA_SIZE+(+5+48)*8(r1)
+       stfd    ft4,LA_SIZE+(+5+50)*8(r1)
+       stfd    ft5,LA_SIZE+(+5+52)*8(r1)
 #else
-       SAVE_ARGUMENT_REGISTERS(LA_WORD_SIZE+1) /* save 8 int/8 float arguments   */
-       SAVE_TEMPORARY_REGISTERS(LA_WORD_SIZE+1+24)
+       SAVE_ARGUMENT_REGISTERS(LA_SIZE_IN_POINTERS+PA_SIZE_IN_POINTERS) /* save 8 int/8 float arguments   */
+       SAVE_TEMPORARY_REGISTERS(LA_SIZE_IN_POINTERS+PA_SIZE_IN_POINTERS+ARG_CNT)
 #endif
 
-       stw     itmp1,LA_SIZE+(5+54)*4(sp)
-       stw     itmp2,LA_SIZE+(5+55)*4(sp)
-       stw     pv,LA_SIZE+(5+56)*4(sp)
+       std     itmp1,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+1*8(sp)
+       std     itmp2,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+2*8(sp)
+       std     pv,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+3*8(sp)
 
-       addi    a0,sp,LA_SIZE+(5+58)*4      /* pass SP of patcher stub            */
+       addi    a0,sp,LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8      /* pass SP of patcher stub            */
        mr      a1,pv                       /* pass PV                            */
        mr      a2,r0                       /* pass RA (correct for leafs)        */
        bl      patcher_wrapper
-       stw     v0,LA_SIZE+(5+57)*4(sp)     /* save return value                  */
+       std     v0,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+4*8(sp)     /* save return value                  */
 
 #if defined(__DARWIN__)
-       lwz     a0,LA_SIZE+(5+0)*4(r1)
-       lwz     a1,LA_SIZE+(5+1)*4(r1)
-       lwz     a2,LA_SIZE+(5+2)*4(r1)
-       lwz     a3,LA_SIZE+(5+3)*4(r1)
-       lwz     a4,LA_SIZE+(5+4)*4(r1)
-       lwz     a5,LA_SIZE+(5+5)*4(r1)
-       lwz     a6,LA_SIZE+(5+6)*4(r1)
-       lwz     a7,LA_SIZE+(5+7)*4(r1)
-
-       lfd     fa0,LA_SIZE+(5+8)*4(sp)
-       lfd     fa1,LA_SIZE+(5+10)*4(sp)
-       lfd     fa2,LA_SIZE+(5+12)*4(sp)
-       lfd     fa3,LA_SIZE+(5+14)*4(sp)
-       lfd     fa4,LA_SIZE+(5+16)*4(sp)
-       lfd     fa5,LA_SIZE+(5+18)*4(sp)
-       lfd     fa6,LA_SIZE+(5+20)*4(sp)
-       lfd     fa7,LA_SIZE+(5+22)*4(sp)
-       lfd     fa8,LA_SIZE+(5+24)*4(sp)
-       lfd     fa9,LA_SIZE+(5+26)*4(sp)
-       lfd     fa10,LA_SIZE+(5+28)*4(sp)
-       lfd     fa11,LA_SIZE+(5+30)*4(sp)
-       lfd     fa12,LA_SIZE+(5+32)*4(sp)
-
-       lwz     t0,(LA_WORD_SIZE+5+33)*4(r1)
-       lwz     t1,(LA_WORD_SIZE+5+34)*4(r1)
-       lwz     t2,(LA_WORD_SIZE+5+35)*4(r1)
-       lwz     t3,(LA_WORD_SIZE+5+36)*4(r1)
-       lwz     t4,(LA_WORD_SIZE+5+37)*4(r1)
-       lwz     t5,(LA_WORD_SIZE+5+38)*4(r1)
-       lwz     t6,(LA_WORD_SIZE+5+39)*4(r1)
-       lwz     t7,(LA_WORD_SIZE+5+40)*4(r1)
-
-       lfd     ft0,(LA_WORD_SIZE+5+42)*4(r1)
-       lfd     ft1,(LA_WORD_SIZE+5+44)*4(r1)
-       lfd     ft2,(LA_WORD_SIZE+5+46)*4(r1)
-       lfd     ft3,(LA_WORD_SIZE+5+48)*4(r1)
-       lfd     ft4,(LA_WORD_SIZE+5+50)*4(r1)
-       lfd     ft5,(LA_WORD_SIZE+5+52)*4(r1)
+       lwz     a0,LA_SIZE+(5+0)*8(r1)
+       lwz     a1,LA_SIZE+(5+1)*8(r1)
+       lwz     a2,LA_SIZE+(5+2)*8(r1)
+       lwz     a3,LA_SIZE+(5+3)*8(r1)
+       lwz     a4,LA_SIZE+(5+4)*8(r1)
+       lwz     a5,LA_SIZE+(5+5)*8(r1)
+       lwz     a6,LA_SIZE+(5+6)*8(r1)
+       lwz     a7,LA_SIZE+(5+7)*8(r1)
+
+       lfd     fa0,LA_SIZE+(5+8)*8(sp)
+       lfd     fa1,LA_SIZE+(5+10)*8(sp)
+       lfd     fa2,LA_SIZE+(5+12)*8(sp)
+       lfd     fa3,LA_SIZE+(5+14)*8(sp)
+       lfd     fa4,LA_SIZE+(5+16)*8(sp)
+       lfd     fa5,LA_SIZE+(5+18)*8(sp)
+       lfd     fa6,LA_SIZE+(5+20)*8(sp)
+       lfd     fa7,LA_SIZE+(5+22)*8(sp)
+       lfd     fa8,LA_SIZE+(5+24)*8(sp)
+       lfd     fa9,LA_SIZE+(5+26)*8(sp)
+       lfd     fa10,LA_SIZE+(5+28)*8(sp)
+       lfd     fa11,LA_SIZE+(5+30)*8(sp)
+       lfd     fa12,LA_SIZE+(5+32)*8(sp)
+
+       lwz     t0,LA_SIZE+(+5+33)*8(r1)
+       lwz     t1,LA_SIZE+(+5+34)*8(r1)
+       lwz     t2,LA_SIZE+(+5+35)*8(r1)
+       lwz     t3,LA_SIZE+(+5+36)*8(r1)
+       lwz     t4,LA_SIZE+(+5+37)*8(r1)
+       lwz     t5,LA_SIZE+(+5+38)*8(r1)
+       lwz     t6,LA_SIZE+(+5+39)*8(r1)
+       lwz     t7,LA_SIZE+(+5+40)*8(r1)
+
+       lfd     ft0,LA_SIZE+(+5+42)*8(r1)
+       lfd     ft1,LA_SIZE+(+5+44)*8(r1)
+       lfd     ft2,LA_SIZE+(+5+46)*8(r1)
+       lfd     ft3,LA_SIZE+(+5+48)*8(r1)
+       lfd     ft4,LA_SIZE+(+5+50)*8(r1)
+       lfd     ft5,LA_SIZE+(+5+52)*8(r1)
 #else
-       RESTORE_ARGUMENT_REGISTERS(LA_WORD_SIZE+1) /* restore 8 int/8 float args  */
-       RESTORE_TEMPORARY_REGISTERS(LA_WORD_SIZE+1+24)
+       RESTORE_ARGUMENT_REGISTERS(LA_SIZE_IN_POINTERS+PA_SIZE_IN_POINTERS) /* restore 8 int/8 float args  */
+       RESTORE_TEMPORARY_REGISTERS(LA_SIZE_IN_POINTERS+PA_SIZE_IN_POINTERS+ARG_CNT)
 #endif
 
-       lwz     itmp1,LA_SIZE+(5+54)*4(sp)
-       lwz     itmp2,LA_SIZE+(5+55)*4(sp)
-       lwz     pv,LA_SIZE+(5+56)*4(sp)
-       lwz     itmp3,LA_SIZE+(5+57)*4(sp)  /* restore return value into temp reg.*/
+       ld     itmp1,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+1*8(sp)
+       ld     itmp2,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+2*8(sp)
+       ld     pv,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+3*8(sp)
+       ld     itmp3,LA_SIZE+PA_SIZE+(ARG_CNT+TMP_CNT)*8+4*8(sp)
 
-       lwz     r0,(6+LA_WORD_SIZE+5+58)*4(sp) /* restore RA                      */
+       ld      r0,LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8+6*8(sp) /* restore RA                      */
        mtlr    r0
 
        mr.     itmp3,itmp3           /* check for an exception                   */
        bne     L_asm_patcher_wrapper_exception
 
                                      /* get return address (into JIT code)       */
-       lwz     itmp3,(5+LA_WORD_SIZE+5+58)*4(sp)
+       ld     itmp3,LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8+5*8(sp)
 
                                      /* remove stack frame + patcher stub stack  */
-       addi    sp,sp,(8+LA_WORD_SIZE+5+58)*4
+       addi    sp,sp,LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8+8*8
 
        mtctr   itmp3
        bctr                          /* jump to new patched code                 */
 
 L_asm_patcher_wrapper_exception:
        mr      xptr,itmp3                  /* get exception                      */
-       lwz     xpc,(5+LA_WORD_SIZE+5+58)*4(sp)
-       addi    sp,sp,(8+LA_WORD_SIZE+5+58)*4
+       ld      xpc,LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8+5*8(sp)
+       addi    sp,sp,LA_SIZE+PA_SIZE+ARG_CNT*8+TMP_CNT*8+4*8+8*8
        b       L_asm_handle_exception
 
+#if defined(ENABLE_REPLACEMENT)
 
 /* asm_replacement_out *********************************************************
 
@@ -1354,26 +1294,37 @@ asm_replacement_out:
        mtctr   itmp3
        bctr
 
-/*********************************************************************/
-/*
+#endif /* defined(ENABLE_REPLACEMENT) */
+
+/* asm_cacheflush **************************************************************
+       copied from linux/arch/ppc64/kernel/vdso64/cacheflush.S
+       assumes 128 byte cache line size.
+       All registers used may be trashed for fun and profit.
+*******************************************************************************/
+
+       .section ".opd","aw"
+       .align 3
 asm_cacheflush:
-       .quad .asm_cacheflush,.TOC.@tocbase,0
-       .previous
-       .size asm_cacheflush,24
-       .type .asm_cacheflush,@function
-       .globl .asm_cacheflush
-*/
+               .quad   .asm_cacheflush,.TOC.@tocbase,0
+               .previous
+               .size asm_cacheflush, 24
+               .type .asm_cacheflush,@function
+               .globl .asm_cacheflush 
 .asm_cacheflush:
+       /* construct the AND mask */
+       li      r6,   0xffffffffffff8000
+       ori     r6,r6,0x000000000000ff80
+
        add     r4,r3,r4
-       rldimi  r3,r3,0,26
-       addi    r4,r4,31
-       rldimi  r4,r4,0,26
+       and.    r3,r3,r6
+       addi    r4,r4,127
+       and.    r4,r4,r6
        mr      r5,r3
 1:
        cmpld   r3,r4
        bge     0f
        dcbst   0,r3
-       addi    r3,r3,32
+       addi    r3,r3,128
        b       1b
 0:
        sync
@@ -1381,16 +1332,26 @@ asm_cacheflush:
        cmpld   r5,r4
        bge     0f
        icbi    0,r5
-       addi    r5,r5,32
+       addi    r5,r5,128
        b       1b
 0:
        sync
        isync
        blr
 
-
+/*
+               asm_getclassvalues_atomic 
+*/
+       .section ".opd","aw"
+       .align 3
 asm_getclassvalues_atomic:
-.globl .asm_getclassvalues_atomic
+               .quad   .asm_getclassvalues_atomic,.TOC.@tocbase,0
+               .previous
+               .size asm_getclassvalues_atomic, 24
+               .type .asm_getclassvalues_atomic,@function
+               .globl .asm_getclassvalues_atomic
+.asm_getclassvalues_atomic:
+
 _crit_restart:
 _crit_begin:
        lwz     r6,offbaseval(r3)
@@ -1494,7 +1455,6 @@ L_jit_asm_compile$lazy_ptr:
        .indirect_symbol _jit_asm_compile
        .long dyld_stub_binding_helper
 
-
 .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
        .align 2
 L_stacktrace_remove_stackframeinfo$stub: